# Dependências

In [1]:
import pandas as pd
import numpy as np
from glob import glob

# Dados

### Dados de Treinamento 

In [2]:
data_train = pd.read_csv("./data/train.csv", sep=',')
data_train.head(5)

Unnamed: 0,Id,field,age,type,harvest_year,harvest_month,production
0,0,0,19,5,2004,1,0.064071
1,1,0,19,5,2004,2,0.047658
2,2,0,19,5,2004,3,0.016866
3,3,0,19,5,2004,4,0.025525
4,4,0,19,5,2004,5,0.04769



## Treinamento + Fields

In [3]:
fields = [ field for field in glob('./data/field**.csv')]

In [4]:
data_fields = []
for field in fields:
    field_ = pd.read_csv(field, sep=',')
    field_['field'] = int(field.split('-')[1].split('.')[0])
    data_fields.append(field_)
data_fields = pd.concat(data_fields)

In [5]:
data_train_fields = data_train.merge(data_fields, left_on=['field','harvest_year', 'harvest_month'], right_on=['field', 'year', 'month'])
data_train_fields.to_csv('data/data_train_fields.csv', sep=',', encoding='utf-8')
data_train_fields.head(5)

Unnamed: 0,Id,field,age,type,harvest_year,harvest_month,production,month,year,temperature,dewpoint,windspeed,Soilwater_L1,Soilwater_L2,Soilwater_L3,Soilwater_L4,Precipitation
0,0,0,19,5,2004,1,0.064071,1,2004,26.132,24.661,1.8766,0.35274,0.35192,0.34844,0.33385,360.91
1,1532,0,13,2,2004,1,0.283228,1,2004,26.132,24.661,1.8766,0.35274,0.35192,0.34844,0.33385,360.91
2,4204,0,4,5,2004,1,0.106263,1,2004,26.132,24.661,1.8766,0.35274,0.35192,0.34844,0.33385,360.91
3,1,0,19,5,2004,2,0.047658,2,2004,25.295,24.401,1.9206,0.36361,0.36376,0.36411,0.36357,484.67
4,1533,0,13,2,2004,2,0.182068,2,2004,25.295,24.401,1.9206,0.36361,0.36376,0.36411,0.36357,484.67


## Treinamento + Fields + Soil

In [6]:
data_soil = pd.read_csv("data/soil_data.csv", sep=',')

In [7]:
data_train_fields_soil = data_train_fields.merge(data_soil, on='field')
data_train_fields_soil.to_csv('data/data_train_fields_soil.csv', sep=',', encoding='utf-8')
data_train_fields_soil.head(5)

Unnamed: 0,Id,field,age,type,harvest_year,harvest_month,production,month,year,temperature,...,SLTPPT_sl5,SLTPPT_sl6,SLTPPT_sl7,SNDPPT_sl1,SNDPPT_sl2,SNDPPT_sl3,SNDPPT_sl4,SNDPPT_sl5,SNDPPT_sl6,SNDPPT_sl7
0,0,0,19,5,2004,1,0.064071,1,2004,26.132,...,22,22,23,44,45,44,39,38,37,36
1,1532,0,13,2,2004,1,0.283228,1,2004,26.132,...,22,22,23,44,45,44,39,38,37,36
2,4204,0,4,5,2004,1,0.106263,1,2004,26.132,...,22,22,23,44,45,44,39,38,37,36
3,1,0,19,5,2004,2,0.047658,2,2004,25.295,...,22,22,23,44,45,44,39,38,37,36
4,1533,0,13,2,2004,2,0.182068,2,2004,25.295,...,22,22,23,44,45,44,39,38,37,36


### Dados de testes 

In [8]:
data_test = pd.read_csv('data/test.csv', sep=',')
data_test.head(5)

Unnamed: 0,Id,field,age,type,harvest_year,harvest_month
0,5243,0,27,5,2012,1
1,5244,0,27,5,2012,2
2,5245,0,27,5,2012,3
3,5246,0,27,5,2012,4
4,5247,0,27,5,2012,5


### Testes + Fields

In [9]:
data_test_fields = data_train.merge(data_fields, left_on=['field','harvest_year', 'harvest_month'], right_on=['field', 'year', 'month'])
data_test_fields.to_csv('data/data_test_fields.csv', sep=',', encoding='utf-8')
data_test_fields.head(5)

Unnamed: 0,Id,field,age,type,harvest_year,harvest_month,production,month,year,temperature,dewpoint,windspeed,Soilwater_L1,Soilwater_L2,Soilwater_L3,Soilwater_L4,Precipitation
0,0,0,19,5,2004,1,0.064071,1,2004,26.132,24.661,1.8766,0.35274,0.35192,0.34844,0.33385,360.91
1,1532,0,13,2,2004,1,0.283228,1,2004,26.132,24.661,1.8766,0.35274,0.35192,0.34844,0.33385,360.91
2,4204,0,4,5,2004,1,0.106263,1,2004,26.132,24.661,1.8766,0.35274,0.35192,0.34844,0.33385,360.91
3,1,0,19,5,2004,2,0.047658,2,2004,25.295,24.401,1.9206,0.36361,0.36376,0.36411,0.36357,484.67
4,1533,0,13,2,2004,2,0.182068,2,2004,25.295,24.401,1.9206,0.36361,0.36376,0.36411,0.36357,484.67


### Teste + Fields + Soil

In [10]:
data_test_fields_soil = data_test_fields.merge(data_soil, on='field')
data_test_fields_soil.to_csv('data/data_test_fields_soil.csv', sep=',', encoding='utf-8')
data_test_fields_soil.head(5)

Unnamed: 0,Id,field,age,type,harvest_year,harvest_month,production,month,year,temperature,...,SLTPPT_sl5,SLTPPT_sl6,SLTPPT_sl7,SNDPPT_sl1,SNDPPT_sl2,SNDPPT_sl3,SNDPPT_sl4,SNDPPT_sl5,SNDPPT_sl6,SNDPPT_sl7
0,0,0,19,5,2004,1,0.064071,1,2004,26.132,...,22,22,23,44,45,44,39,38,37,36
1,1532,0,13,2,2004,1,0.283228,1,2004,26.132,...,22,22,23,44,45,44,39,38,37,36
2,4204,0,4,5,2004,1,0.106263,1,2004,26.132,...,22,22,23,44,45,44,39,38,37,36
3,1,0,19,5,2004,2,0.047658,2,2004,25.295,...,22,22,23,44,45,44,39,38,37,36
4,1533,0,13,2,2004,2,0.182068,2,2004,25.295,...,22,22,23,44,45,44,39,38,37,36
