# Data Ingestion and Data Wrangling Part I

Georgetown University School of Continuing Studies 
Georgetown Data Analytics 
Capstone 10: Team Agronomics 

#### Data Sources: 

- Crop (Almond) Data
   - https://www.nass.usda.gov/Statistics_by_State/California/Publications/AgComm/Detail/
   - https://www.nass.usda.gov/Statistics_by_State/California/Publications/Fruits_and_Nuts/2017/201704almac.pdf
   
   
- Precipitation and Temperature 
   - http://www.wrh.noaa.gov/hnx/fatmain.php


- Census Tracts
  -  https://fred.stlouisfed.org/

In [49]:
import os
import pandas as pd
import csv
import numpy as np
from functools import reduce
from pandas import DataFrame, Series 
import glob

In [50]:
import warnings
warnings.filterwarnings('ignore')

Combining crop data for 1980 to 2015 into one dataframe. The original crop data files were initially named to have the same file ending "cropyear_production.csv"

In [51]:
# Taking ALL the csv files for crop production. This was manually put in one folder by project creators 

path =r'csv_data/'
filenames = glob.glob(path + "/*cropyear_production.csv")

#Creates an empty list for a dataframe (sets up data frame for combining all crop data files into one dataframe)
dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))
    
# Concatenate all data into one DataFrame (within the empty list)
big_frame = pd.concat(dfs, ignore_index=True)



In [52]:
path =r'csv_data/'
filenames = glob.glob(path + "/*cropyear_production.csv")

dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))
# Concatenate all data into one DataFrame
big_frame = pd.concat(dfs, ignore_index=True)

In [53]:
# Create a csv file named CROP_PRODUCTION_ALL for the data from big_frame
with open('CROP_PRODUCTION_ALL.csv', 'w') as f:
    big_frame.to_csv(f, header=False,index=0)


In [54]:
# Create column names 
colnames=['Year','Commodity_Code','Crop_Name','County_Code','County','Harvested','Yield','Production','Price','Unit','Value'] 

# Adds the column names to the dataframe 
crop_production=pd.read_csv('CROP_PRODUCTION_ALL.csv', names=colnames, header=None,dtype=object)

print(crop_production.shape)

crop_production.head()

(95257, 11)


Unnamed: 0,Year,Commodity_Code,Crop_Name,County_Code,County,Harvested,Yield,Production,Price,Unit,Value
0,1980,268099,ALMOND HULLS,7,Butte,,,30000,59,TONS,1770000
1,1980,268099,ALMOND HULLS,11,Colusa,,,5250,60,TONS,315000
2,1980,268099,ALMOND HULLS,19,Fresno,,,23700,75,TONS,1778000
3,1980,268099,ALMOND HULLS,29,Kern,,,88600,77,TONS,6811000
4,1980,268099,ALMOND HULLS,31,Kings,,,4216,50,TONS,210800


## Creating files for California counties of interest: 
1. Fresno County 
2. Kern County 

In [55]:
# Create new data frame for Fresno County 

crop_production_Fresno=crop_production[crop_production['County'].dropna().str.contains("Fresno") & crop_production['Crop_Name'].dropna().str.contains("ALMONDS ALL")]

#Makes all counties named Fresno to remove manual error (from original dataset). This makes the data consistent. 
crop_production_Fresno['County']='Fresno'

#crop_production_Fresno.loc[:,'County']='Fresno'

In [56]:
crop_production_Fresno.head()

Unnamed: 0,Year,Commodity_Code,Crop_Name,County_Code,County,Harvested,Yield,Production,Price,Unit,Value
18,1980,261999,ALMONDS ALL,19,Fresno,23992,0.59,14200,3000,TONS,42600000
2535,1981,261999,ALMONDS ALL,19,Fresno,23820,0.75,17900,1500,TONS,26850000
5125,1982,261999,ALMONDS ALL,19,Fresno,23642,0.69,16300,1800,TONS,29340000
7762,1983,261999,ALMONDS ALL,19,Fresno,24301,0.48,11700,2300,TONS,26910000
10348,1984,261999,ALMONDS ALL,19,Fresno,26300,0.94,24700,1800,TONS,44460000


In [57]:
#Create new data frame for Kern County 

crop_production_Kern=crop_production[crop_production['County'].dropna().str.contains("Kern") & crop_production['Crop_Name'].dropna().str.contains("ALMONDS ALL")]

#Makes all counties named Kern to remove manual error ( from original dataset). This makes the data consistent. 
crop_production_Kern['County']='Kern'

#crop_production_Kern.loc[:,'County']='Kern'


In [58]:
# Confirm dataframe 
with open('1-CROP_PRODUCTION-Fresno.csv', 'w') as f:
    
    crop_production_Fresno.to_csv(f, header=True,index=0)
f.close()

In [59]:
# Confirm dataframe 
with open('1-CROP_PRODUCTION-Kern.csv', 'w') as f:
    
    crop_production_Kern.to_csv(f, header=True,index=0)
f.close()

In [60]:
# Define columns for analysis 

f_crop_f=pd.read_csv('1-CROP_PRODUCTION-Fresno.csv',usecols=['Year','Commodity_Code','County_Code','County','Harvested','Yield','Production','Price','Value'],dtype={'Year':int,'Harvested':int,'Production':int,'Price':float})
df_crop_k=pd.read_csv('1-CROP_PRODUCTION-Kern.csv',usecols=['Year','Commodity_Code','County_Code','County','Harvested','Yield','Production','Price','Value'],dtype={'Year':int,'Harvested':int,'Production':int,'Price':float})

In [61]:
f_crop_f.head()

Unnamed: 0,Year,Commodity_Code,County_Code,County,Harvested,Yield,Production,Price,Value
0,1980,261999,19,Fresno,23992,0.59,14200,3000.0,42600000
1,1981,261999,19,Fresno,23820,0.75,17900,1500.0,26850000
2,1982,261999,19,Fresno,23642,0.69,16300,1800.0,29340000
3,1983,261999,19,Fresno,24301,0.48,11700,2300.0,26910000
4,1984,261999,19,Fresno,26300,0.94,24700,1800.0,44460000


In [62]:
df_crop_k.head()

Unnamed: 0,Year,Commodity_Code,County_Code,County,Harvested,Yield,Production,Price,Value
0,1980,261999,29,Kern,61040,0.8,49100,2860.0,140426000
1,1981,261999,29,Kern,61592,0.65,40200,1461.0,58740000
2,1982,261999,29,Kern,61247,0.7,43000,1699.0,73051000
3,1983,261999,29,Kern,66804,0.61,40500,2301.0,93191000
4,1984,261999,29,Kern,70946,0.92,65400,1459.0,95431000


# Precipitation  and Temperature Data 

Precipiation: Years 1980 to 2016
Temperature: Years 1980 to 2016 

In [63]:
# Fresno County Annual Precipitation (inches)

df_prec_f=pd.read_csv("f_temp_prec/Fresno_Rainfall_Yearly.csv")
df_prec_f.rename(columns={
                 'January': 'January_p',
                 'February': 'February_p',
                 'March': 'March_p',
                 'April': 'April_p',
                 'May': 'May_p',
                 'June': 'June_p',
                 'July': 'July_p',
                 'August': 'August_p',
                 'September': 'September_p',
                 'October': 'October_p',
                 'November': 'November_p',
                 'December': 'December_p',
                 'Total': 'Total_p'}, inplace=True)

df_prec_f.head()

Unnamed: 0,Year,January_p,February_p,March_p,April_p,May_p,June_p,July_p,August_p,September_p,October_p,November_p,December_p,Total_p
0,Norm,2.19,2.03,2.03,0.95,0.43,0.21,0.01,0.01,0.17,0.63,1.07,1.77,11.5
1,1878,3.2,1.78,1.91,0.78,T,0.00,0.00,0.00,0.0,0.2,0.56,0.22,8.65
2,1879,1.28,0.56,0.66,1.33,0.06,T,0.00,0.00,0.0,0.55,0.48,1.67,6.59
3,1880,0.46,2.54,0.61,1.97,0.15,0.00,0.00,0.00,0.0,0.0,0.44,3.05,9.22
4,1881,2.21,0.87,0.55,1.0,0.10,0.00,T,T,0.46,0.36,0.27,0.16,5.98


In [64]:
#The precipitation data includes values set as 'T' for Trace. 
#Trace are precipitation measurments where precipitation occured but did not measure atleast 0.01 inches. 
# The code below gives a random number between 0.001 and 0.005 for Trace values 

df_prec_f.replace(['T'],np.random.uniform(+0.001,+0.005), inplace=True)
df_prec1_f=df_prec_f[(df_prec_f['Year']>='1980') & (df_prec_f['Year']<'2016')]
df_prec1_f = df_prec1_f.astype(float)

In [65]:
with open('2-RAINFALL_FRESNO.csv', 'w') as f:
    df_prec1_f.to_csv(f, header=True,index=0,float_format='%.3f') #take 3 decimals places 
f.close()



In [66]:
df_prec1_f.head()

Unnamed: 0,Year,January_p,February_p,March_p,April_p,May_p,June_p,July_p,August_p,September_p,October_p,November_p,December_p,Total_p
103,1980.0,3.83,3.3,2.05,0.25,0.18,0.004078,0.01,0.0,0.0,0.03,0.14,0.49,10.28
104,1981.0,2.67,1.29,2.59,1.01,0.004078,0.0,0.0,0.0,0.0,0.58,1.22,0.65,10.01
105,1982.0,2.11,0.58,4.76,0.89,0.0,0.31,0.0,0.004078,1.1,1.58,3.16,1.59,16.08
106,1983.0,5.14,3.7,4.53,2.76,0.01,0.0,0.0,0.09,1.03,0.09,2.51,1.75,21.61
107,1984.0,0.15,1.05,0.48,0.25,0.02,0.2,0.004078,0.004078,0.0,0.7,1.94,1.98,6.77


In [67]:
#Kern County 

df_prec_k=pd.read_csv("k_temp_prec/Kern_Rainfall_Yearly.csv")
df_prec_k.columns=['Year','January_p','February_p','March_p','April_p','May_p','June_p','July_p','August_p','September_p','October_p','November_p','December_p','Total_p']

In [68]:
df_prec_k.replace(['T'],np.random.uniform(+0.001,+0.005), inplace=True)
df_prec_k = df_prec_k.astype(float)
# data needs to be cleaned and consistent 
df_prec_k.replace(r'[\-]+', r'', regex=True, inplace=True)
df_prec1_k=df_prec_k[(df_prec_k['Year']>=1980) & (df_prec_k['Year']<2016)]
df_prec1_k = df_prec_k.astype(float)

In [69]:
with open('2-RAINFALL_KERN.csv', 'w') as f:
    #df_prec1_k = df_prec1_k.astype(float)
    df_prec1_k.to_csv(f, header=True,index=0,float_format='%.3f')
f.close()

df_prec1_k.head()

Unnamed: 0,Year,January_p,February_p,March_p,April_p,May_p,June_p,July_p,August_p,September_p,October_p,November_p,December_p,Total_p
0,1889.0,0.57,0.2,1.88,0.15,0.22,0.0,0.0,0.0,0.0,2.04,0.22,1.75,7.03
1,1890.0,1.2,0.16,0.24,0.0,0.06,0.0,0.03,0.47,0.0,0.0,0.0,1.34,3.5
2,1891.0,0.2,1.2,0.25,0.27,0.22,0.02,0.0,0.0,0.12,0.0,0.2,1.08,3.56
3,1892.0,1.61,0.45,1.25,0.00187,0.41,0.39,0.0,0.0,0.0,0.0,0.55,0.75,5.41
4,1893.0,0.61,0.88,2.3,0.32,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.97,5.28


In [70]:
# Temperature (degrees Fahrenhiet)

#Fresno County 

df_temp_f=pd.read_csv("f_temp_prec/Fresno_Average_Monthly_Temperatures.csv")
df_temp_f.rename(columns={
                 'January': 'January_t',
                 'February': 'February_t',
                 'March': 'March_t',
                 'April': 'April_t',
                 'May': 'May_t',
                 'June': 'June_t',
                 'July': 'July_t',
                 'August': 'August_t',
                 'September': 'September_t',
                 'October': 'October_t',
                 'November': 'November_t',
                 'December': 'December_t',
                 'Annual': 'Annual_t'}, inplace=True)

df_temp_f.head()

Unnamed: 0,Year,January_t,February_t,March_t,April_t,May_t,June_t,July_t,August_t,September_t,October_t,November_t,December_t,Annual_t
0,1887,-,-,-,-,-,-,-,-,74.6,67.7,55.6,46.0,-
1,1888,44.1,53,53.6,66.1,68,73.4,81.7,83,80.7,66.5,54.3,48.2,64.4
2,1889,43.8,50.5,58.4,63.5,69.6,79.5,82.6,82.2,75.6,62.8,54.1,49.1,64.3
3,1890,42.3,47.2,54.6,61.2,69.4,73.4,82.5,80.8,74.6,64.5,56.9,43.8,62.6
4,1891,45.4,48.5,54.4,59,67.1,73,83.6,83.6,74.6,67.0,56.2,43.9,63


In [71]:
df_temp1_f=df_temp_f[(df_temp_f['Year']>=1980) & (df_temp_f['Year']<2016)]
df_temp1_f = df_temp1_f.astype(float)

In [72]:
df_temp1_f.head()

Unnamed: 0,Year,January_t,February_t,March_t,April_t,May_t,June_t,July_t,August_t,September_t,October_t,November_t,December_t,Annual_t
93,1980.0,49.4,53.8,53.7,61.8,67.2,73.7,84.0,80.7,75.6,68.4,54.2,46.8,64.1
94,1981.0,47.9,52.0,54.5,63.2,70.9,82.8,84.9,82.9,76.5,61.4,55.4,47.7,65.0
95,1982.0,41.7,50.5,51.4,58.0,69.3,72.9,81.0,80.4,72.3,65.0,51.1,45.4,61.6
96,1983.0,45.2,53.1,55.9,57.9,69.7,76.3,79.0,82.1,78.8,68.5,54.6,51.1,64.4
97,1984.0,47.8,50.7,58.4,60.8,74.8,77.5,87.0,83.5,81.0,62.4,53.6,46.5,65.3


In [73]:
#Kern County 

df_temp_k=pd.read_csv("k_temp_prec/Kern_Average_Temperatures.csv")
df_temp_k.rename(columns={
                 'January': 'January_t',
                 'February': 'February_t',
                 'March': 'March_t',
                 'April': 'April_t',
                 'May': 'May_t',
                 'June': 'June_t',
                 'July': 'July_t',
                 'August': 'August_t',
                 'September': 'September_t',
                 'October': 'October_t',
                 'November': 'November_t',
                 'December': 'December_t',
                 'Annual': 'Annual_t'}, inplace=True)

In [74]:
df_temp_k.columns=['Year','January_t','February_t','March_t','April_t','May_t','June_t','July_t','August_t','September_t','October_t','November_t','December_t','Annual_t']
df_temp_k.replace(r'[\-]+', r'', regex=True, inplace=True)
df_temp1_k=df_temp_k[(df_temp_k['Year']>=1980) & (df_temp_k['Year']<2016)]

# Census Data 
# Insert data source links 
Year: 1980 - 2015

## Fresno County Census 

In [75]:
f_census1=pd.read_csv("census_data/fresno_percapita_personal_income.csv")
f_census2=pd.read_csv("census_data/fresno_personal_income.csv")
f_census3=pd.read_csv("census_data/fresno-resident_population.csv")
f_census4=pd.read_csv("census_data/fresno_house_price_index.csv")

In [76]:
f_census_df=[f_census1,f_census2,f_census3,f_census4]
fresno_census_concat = reduce(lambda  left,right: pd.merge(left,right,on='DATE', how='left'), f_census_df)
fresno_census_concat['DATE'] = fresno_census_concat['DATE'].map(lambda x: str(x)[:-6])
with open('3-FRESNO_CENSUS_DATA.csv', 'w') as f:
    fresno_census_concat.to_csv(f, header=True,index=0)


In [77]:
col_census=['Year','Percapita_Personal_Income','Personal_Income','Resident_Population','House_Price_Index']
fresno_census=pd.read_csv('3-FRESNO_CENSUS_DATA.csv', names=col_census)
fresno_census=fresno_census[(fresno_census['Year']>='1980') & (fresno_census['Year']<'2016')]
fresno_census = fresno_census.astype(float)

In [78]:
fresno_census_concat.head()

Unnamed: 0,DATE,PCPI06019,PI06019,CAFRES9POP,ATNHPIUS06019A
0,1969,3675,1500506,,
1,1970,3983,1652657,413.329,
2,1971,4237,1799513,424.7,
3,1972,4713,2035429,431.9,
4,1973,5311,2336200,439.9,


In [79]:
fresno_census.head()


Unnamed: 0,Year,Percapita_Personal_Income,Personal_Income,Resident_Population,House_Price_Index
12,1980.0,10682.0,5529740.0,514.621,63.37
13,1981.0,10946.0,5789241.0,528.891,68.04
14,1982.0,11243.0,6087889.0,541.5,70.15
15,1983.0,11532.0,6410514.0,555.873,66.27
16,1984.0,12393.0,7089714.0,572.091,66.82


## Kern County Census 

In [80]:
k_census1=pd.read_csv("census_data/kern_percapita_personal_income.csv")
k_census2=pd.read_csv("census_data/kern_personal_income.csv")
k_census3=pd.read_csv("census_data/kern_resident_population.csv")
k_census4=pd.read_csv("census_data/kern_house_price_index.csv")

In [81]:
# Merge all four census data files into one file 
k_census_df=[k_census1,k_census2,k_census3,k_census4]
kern_census_concat = reduce(lambda  left,right: pd.merge(left,right,on='DATE', how='left'), k_census_df)
kern_census_concat['DATE'] = kern_census_concat['DATE'].map(lambda x: str(x)[:-6])

kern_census_concat.rename(columns={
                 'DATE': 'Year',
                 'PCPI06029': 'Percapita_Personal_Income',
                 'PI06029': 'Personal_Income',
                 'CAKERN0POP': 'Resident_Population',
                 'ATNHPIUS06029A': 'House_Price_Index'}, inplace=True)

In [82]:
k_census1=pd.read_csv("census_data/kern_percapita_personal_income.csv")
k_census2=pd.read_csv("census_data/kern_personal_income.csv")
k_census3=pd.read_csv("census_data/kern_resident_population.csv")
k_census4=pd.read_csv("census_data/kern_house_price_index.csv")

In [83]:
# Merge all four census data files into one file 

k_census_df=[k_census1,k_census2,k_census3,k_census4]
kern_census_concat = reduce(lambda  left,right: pd.merge(left,right,on='DATE', how='left'), k_census_df)
kern_census_concat['DATE'] = kern_census_concat['DATE'].map(lambda x: str(x)[:-6])

kern_census_concat.rename(columns={
                 'DATE': 'Year',
                 'PCPI06029': 'Percapita_Personal_Income',
                 'PI06029': 'Personal_Income',
                 'CAKERN0POP': 'Resident_Population',
                 'ATNHPIUS06029A': 'House_Price_Index'}, inplace=True)

In [84]:
with open('3-KERN_CENSUS_DATA.csv', 'w') as f:
    kern_census_concat.to_csv(f, header=True,index=0)

In [85]:
kern_census=pd.read_csv('3-KERN_CENSUS_DATA.csv')

In [86]:
kern_census1=kern_census[(kern_census['Year']>=1980) & (kern_census['Year']<2016)]

In [87]:
with open('4-TEMPERATURES_FRESNO.csv', 'w') as f:
    df_temp1_k = df_temp1_k.astype(float)

    df_temp1_f.to_csv(f, header=True,index=0)
f.close()

In [88]:
with open('4-TEMPERATURES_KERN.csv', 'w') as f:
    df_temp1_k.to_csv(f, header=True,index=0)
f.close()

# Combine Crop Production, Weather, and Census Data 

## Fresno County Combined File 

In [89]:
fresno_data_frames=[f_crop_f,df_prec1_f,df_temp1_f,fresno_census]

df_concat_fresno = reduce(lambda  left,right: pd.merge(left,right,on='Year', how='left'), fresno_data_frames)

In [90]:
df_concat_fresno.head()

Unnamed: 0,Year,Commodity_Code,County_Code,County,Harvested,Yield,Production,Price,Value,January_p,...,August_t,September_t,October_t,November_t,December_t,Annual_t,Percapita_Personal_Income,Personal_Income,Resident_Population,House_Price_Index
0,1980,261999,19,Fresno,23992,0.59,14200,3000.0,42600000,3.83,...,80.7,75.6,68.4,54.2,46.8,64.1,10682.0,5529740.0,514.621,63.37
1,1981,261999,19,Fresno,23820,0.75,17900,1500.0,26850000,2.67,...,82.9,76.5,61.4,55.4,47.7,65.0,10946.0,5789241.0,528.891,68.04
2,1982,261999,19,Fresno,23642,0.69,16300,1800.0,29340000,2.11,...,80.4,72.3,65.0,51.1,45.4,61.6,11243.0,6087889.0,541.5,70.15
3,1983,261999,19,Fresno,24301,0.48,11700,2300.0,26910000,5.14,...,82.1,78.8,68.5,54.6,51.1,64.4,11532.0,6410514.0,555.873,66.27
4,1984,261999,19,Fresno,26300,0.94,24700,1800.0,44460000,0.15,...,83.5,81.0,62.4,53.6,46.5,65.3,12393.0,7089714.0,572.091,66.82


## Kern County Combined File 

In [91]:
kern_data_frames=[df_crop_k,df_prec1_k,df_temp1_k,kern_census]

df_concat_kern = reduce(lambda  left,right: pd.merge(left,right,on='Year', how='left'), kern_data_frames)

In [92]:
df_concat_kern.head()

Unnamed: 0,Year,Commodity_Code,County_Code,County,Harvested,Yield,Production,Price,Value,January_p,...,August_t,September_t,October_t,November_t,December_t,Annual_t,Percapita_Personal_Income,Personal_Income,Resident_Population,House_Price_Index
0,1980,261999,29,Kern,61040,0.8,49100,2860.0,140426000,2.6,...,82.6,77.4,71.6,57.6,50.1,66.0,11043,4488004,403.089,65.67
1,1981,261999,29,Kern,61592,0.65,40200,1461.0,58740000,0.93,...,85.0,80.3,65.0,59.6,51.4,67.8,11470,4804726,418.877,71.38
2,1982,261999,29,Kern,61247,0.7,43000,1699.0,73051000,0.53,...,84.8,77.0,68.7,52.1,46.4,66.2,12048,5225068,433.698,75.38
3,1983,261999,29,Kern,66804,0.61,40500,2301.0,93191000,2.21,...,82.9,79.8,69.0,57.2,50.6,64.7,12177,5442616,446.961,76.5
4,1984,261999,29,Kern,70946,0.92,65400,1459.0,95431000,0.05,...,82.4,80.2,61.6,54.5,47.2,64.1,13070,6004689,459.411,77.84


In [93]:
#Merged all csv files: Crop, weather, and censu
with open('5-MERGE.csv', 'w') as f:
    df_concat_fresno.to_csv(f, header=True,index=0)
    df_concat_kern.to_csv(f, header=False,index=0)
f.close()

# Almond Production Growing Season 

Defined growing seasons and added as new columns to 5-Merge dataframe. 
Precipiation is sum of growing season months (Feburary - June)
Temperature is the adverage temperature for growing season months (Feburary- June)

In [94]:
# Added new columns based on the months for growing seasons

merge_almond_production=pd.read_csv("5-MERGE.csv")
merge_almond_production.insert(9, 'Grow_total_p', (merge_almond_production['February_p']+merge_almond_production['March_p']+merge_almond_production['April_p']+merge_almond_production['May_p']+merge_almond_production['June_p']))
merge_almond_production.insert(10, 'Grow_avg_t', (merge_almond_production['February_t']+merge_almond_production['March_t']+merge_almond_production['April_t']+merge_almond_production['May_t']+merge_almond_production['June_t'])/5)

In [95]:
with open('5-MERGE.csv', 'w') as f:
    merge_almond_production.to_csv(f, header=True,index=0)

f.close()
print(merge_almond_production.shape)

(72, 41)


In [96]:
print(merge_almond_production.columns)

Index(['Year', 'Commodity_Code', 'County_Code', 'County', 'Harvested', 'Yield',
       'Production', 'Price', 'Value', 'Grow_total_p', 'Grow_avg_t',
       'January_p', 'February_p', 'March_p', 'April_p', 'May_p', 'June_p',
       'July_p', 'August_p', 'September_p', 'October_p', 'November_p',
       'December_p', 'Total_p', 'January_t', 'February_t', 'March_t',
       'April_t', 'May_t', 'June_t', 'July_t', 'August_t', 'September_t',
       'October_t', 'November_t', 'December_t', 'Annual_t',
       'Percapita_Personal_Income', 'Personal_Income', 'Resident_Population',
       'House_Price_Index'],
      dtype='object')


In [97]:
pd.read_csv('5-MERGE.csv', names=colnames, header=None,dtype=object)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Year,Commodity_Code,Crop_Name,County_Code,County,Harvested,Yield,Production,Price,Unit,Value
Year,Commodity_Code,County_Code,County,Harvested,Yield,Production,Price,Value,Grow_total_p,Grow_avg_t,January_p,February_p,March_p,April_p,May_p,June_p,July_p,August_p,September_p,October_p,November_p,December_p,Total_p,January_t,February_t,March_t,April_t,May_t,June_t,July_t,August_t,September_t,October_t,November_t,December_t,Annual_t,Percapita_Personal_Income,Personal_Income,Resident_Population,House_Price_Index
1980,261999,19,Fresno,23992,0.59,14200,3000.0,42600000,5.784077951858269,62.04,3.83,3.3,2.05,0.25,0.18,0.004077951858269339,0.01,0.0,0.0,0.03,0.14,0.49,10.28,49.4,53.8,53.7,61.8,67.2,73.7,84.0,80.7,75.6,68.4,54.2,46.8,64.1,10682.0,5529740.0,514.621,63.37
1981,261999,19,Fresno,23820,0.75,17900,1500.0,26850000,4.894077951858269,64.67999999999999,2.67,1.29,2.59,1.01,0.004077951858269339,0.0,0.0,0.0,0.0,0.58,1.22,0.65,10.01,47.9,52.0,54.5,63.2,70.9,82.8,84.9,82.9,76.5,61.4,55.4,47.7,65.0,10946.0,5789241.0,528.891,68.04
1982,261999,19,Fresno,23642,0.69,16300,1800.0,29340000,6.539999999999999,60.42,2.11,0.58,4.76,0.89,0.0,0.31,0.0,0.004077951858269339,1.1,1.58,3.16,1.59,16.08,41.7,50.5,51.4,58.0,69.3,72.9,81.0,80.4,72.3,65.0,51.1,45.4,61.6,11243.0,6087889.0,541.5,70.15
1983,261999,19,Fresno,24301,0.48,11700,2300.0,26910000,11.0,62.580000000000005,5.14,3.7,4.53,2.76,0.01,0.0,0.0,0.09,1.03,0.09,2.51,1.75,21.61,45.2,53.1,55.9,57.9,69.7,76.3,79.0,82.1,78.8,68.5,54.6,51.1,64.4,11532.0,6410514.0,555.873,66.27
1984,261999,19,Fresno,26300,0.94,24700,1800.0,44460000,2.0,64.44,0.15,1.05,0.48,0.25,0.02,0.2,0.004077951858269339,0.004077951858269339,0.0,0.7,1.94,1.98,6.77,47.8,50.7,58.4,60.8,74.8,77.5,87.0,83.5,81.0,62.4,53.6,46.5,65.3,12393.0,7089714.0,572.091,66.82
1985,261999,19,Fresno,31156,0.6,18700,1300.0,24310000,2.89,64.56,0.43,0.71,1.73,0.12,0.0,0.33,0.04,0.02,0.43,0.85,3.02,0.72,8.4,43.3,51.3,53.1,67.2,69.4,81.8,86.0,80.5,72.3,65.0,52.5,43.8,63.8,13037.0,7614794.0,584.07,67.53
1986,261999,19,Fresno,30685,0.4,12300,4000.0,49200000,7.6000000000000005,65.85999999999999,2.12,3.66,3.42,0.36,0.16,0.0,0.004077951858269339,0.0,0.38,0.0,0.01,2.3,12.41,53.6,55.7,60.3,62.7,71.2,79.4,81.9,84.2,71.3,66.9,56.7,47.5,66.0,13700.0,8132396.0,593.621,69.41
1987,261999,19,Fresno,30648,0.96,29400,2000.0,58800000,4.699999999999999,65.06000000000002,1.93,1.36,2.39,0.07,0.87,0.01,0.0,0.0,0.004077951858269339,0.85,0.52,1.19,9.19,45.3,52.8,55.6,66.7,71.8,78.4,77.0,80.2,75.5,70.1,52.3,44.2,64.2,14605.0,8891993.0,608.831,71.09
1988,261999,19,Fresno,30667,0.94,28800,2000.0,57600000,3.99,62.64,1.52,0.83,0.27,2.41,0.45,0.03,0.0,0.0,0.0,0.0,1.42,2.46,9.39,46.0,52.2,56.8,61.6,67.0,75.6,85.5,81.2,76.4,68.7,54.3,44.5,64.2,15213.0,9548499.0,627.658,72.28
