@@ -144,23 +144,16 @@ def IHMEHistoricalDeathsFile():
144144 writer .save ()
145145
146146
147- def FAOFBS ():
148- import requests
147+ def FAOFBS (write_csv_output = False ):
149148 import numpy as np
150- import matplotlib .pyplot as plt
151149 import pandas as pd
152- import csv
153- import xlrd
154- import matplotlib .lines as mlines
155- import matplotlib .transforms as mtransforms
156- import xlsxwriter
157- import dask .dataframe as dd
158- import statsmodels .api as sm
159150
160- Country_Concord = pd .read_csv ('input\CountryConcordFAO.csv' , encoding = "ISO-8859-1" )
161- concord_table = pd .read_csv ('input\Aggregation for crop type.csv' )
162- series_concord_table = pd .read_csv ('input\FAOSeriesConcordance.csv' )
163- data = pd .read_csv (r'input\FoodBalanceSheets_E_All_Data_(Normalized).csv' ,
151+
152+
153+ Country_Concord = pd .read_csv ('input\FAO\CountryConcordFAO.csv' , encoding = "ISO-8859-1" )
154+ concord_table = pd .read_csv ('input\FAO\Aggregation for crop type.csv' )
155+ series_concord_table = pd .read_csv ('input\FAO\FAOSeriesConcordance.csv' )
156+ data = pd .read_csv (r'input\FAO\FoodBalanceSheets_E_All_Data_(Normalized).csv' ,
164157 encoding = "ISO-8859-1" ,chunksize = 100000 )
165158
166159 chunk_list = []
@@ -184,27 +177,13 @@ def FAOFBS():
184177 data = pd .DataFrame (data .to_records ())
185178 data .columns = [hdr .replace ("('sum', 'Value'," , "" ).replace (")" , "" ).replace ("'" , "" ) \
186179 for hdr in data .columns ]
187- return ( data )
180+ data = data . rename ( columns = { "Area" : "CountryName" } )
188181
182+ if write_csv_output == True :
183+ data .to_csv ('output\CSV\FAOFBS.csv' )
189184
190- def FAOFBSFile ():
191- import requests
192- import numpy as np
193- import matplotlib .pyplot as plt
194- import pandas as pd
195- import csv
196- import xlrd
197- import matplotlib .lines as mlines
198- import matplotlib .transforms as mtransforms
199- import xlsxwriter
200- import dask .dataframe as dd
201- import statsmodels .api as sm
202-
203- p = FAOFBS ()
204- writer = pd .ExcelWriter ('FAOFBS2019.xlsx' , engine = 'xlsxwriter' )
205- p .to_excel (writer , sheet_name = 'FAOFBS' )
185+ return (data )
206186
207- writer .save ()
208187
209188
210189def FAOFBSFish ():
@@ -217,7 +196,6 @@ def FAOFBSFish():
217196 import matplotlib .lines as mlines
218197 import matplotlib .transforms as mtransforms
219198 import xlsxwriter
220- import dask .dataframe as dd
221199 import statsmodels .api as sm
222200
223201 Country_Concord = pd .read_csv ('input\CountryConcordFAO.csv' , encoding = "ISO-8859-1" )
@@ -264,7 +242,6 @@ def FAOFBSFishFile():
264242 import matplotlib .lines as mlines
265243 import matplotlib .transforms as mtransforms
266244 import xlsxwriter
267- import dask .dataframe as dd
268245 import statsmodels .api as sm
269246
270247 p = FAOFBSFish ()
@@ -364,53 +341,33 @@ def UISDataFile():
364341
365342 writer .save ()
366343
367-
368- def AQUASTATData ():
369- import requests
344+ def AQUASTATData (write_csv_output = False ):
370345 import numpy as np
371- import matplotlib .pyplot as plt
372346 import pandas as pd
373- import csv
374- import xlrd
375- import matplotlib .lines as mlines
376- import matplotlib .transforms as mtransforms
377- import xlsxwriter
378- import dask .dataframe as dd
379- import statsmodels .api as sm
380347
381- data = pd .read_excel ( 'input\AQUASTAT.xlsx' )
348+ data = pd .read_csv ( r 'input\AQUASTAT\AQUASTAT.csv' , encoding = "ISO-8859-1" )
382349
383- country_concordance = pd .read_excel ( 'input\CountryConcordanceAQUASTAT.xlsx' )
350+ country_concordance = pd .read_csv ( r 'input\AQUASTAT\ CountryConcordanceAQUASTAT.csv' , encoding = "ISO-8859-1" )
384351
385352 data = pd .merge (data , country_concordance , how = "left" , left_on = "Area" , right_on = "Area name" )
386353
387- series_concordance = pd .read_excel ('input\SeriesConcordanceAQUASTAT.xlsx' )
388-
389- # print (series_concordance.head())
390- # print(data.head())
354+ series_concordance = pd .read_csv (r'input\AQUASTAT\SeriesConcordanceAQUASTAT.csv' , encoding = "ISO-8859-1" )
391355
392356 data = pd .merge (data , series_concordance , how = "left" , left_on = "Variable Name" , right_on = "Series name in Aquastat" )
393- # print(data.head())
394- # print(data['Country Name in IFs'].unique)
357+
395358 data = data .drop (['Variable Id' , 'Area Id' , 'Symbol' , 'Md' ], axis = 1 )
396359
397360 data = data .dropna (how = 'any' )
398- data = pd .pivot_table (data , index = ['Country Name in IFs ' , 'Year' ], columns = ['Series name in IFs ' ], values = ['Value' ],
361+ data = pd .pivot_table (data , index = ['CountryName ' , 'Year' ], columns = ['SeriesName ' ], values = ['Value' ],
399362 aggfunc = [np .sum ])
400363 data = pd .DataFrame (data .to_records ())
401364 data .columns = [hdr .replace ("('sum', 'Value'," , "" ).replace (")" , "" ).replace ("'" , "" ) \
402365 for hdr in data .columns ]
403- return (data )
404-
405366
406- def AQUASTATDataFile ():
407- import pandas as pd
408- p = AQUASTATData ()
409- p = p .reset_index ()
410- writer = pd .ExcelWriter ('AQUASTAT.xlsx' , engine = 'xlsxwriter' )
411- p .to_excel (writer , sheet_name = '1' , merge_cells = False )
412- writer .save ()
367+ if write_csv_output == True :
368+ data .to_csv ('output/CSV/AQUASTATOutput.csv' )
413369
370+ return (data )
414371
415372def IMFGFSRevenueData ():
416373 import numpy as np
@@ -465,58 +422,33 @@ def IMFGFSRevenueDataFile():
465422 writer .save ()
466423
467424
468- def IMFGFSExpenditureData ():
469- import requests
425+ def IMFGFSExpenditureData (write_csv_output = False ):
470426 import numpy as np
471- import matplotlib .pyplot as plt
472427 import pandas as pd
473- import csv
474- import xlrd
475- import matplotlib .lines as mlines
476- import matplotlib .transforms as mtransforms
477- import xlsxwriter
478- import statsmodels .api as sm
479428
480- data = pd .read_csv ('input\GFSCOFOG_02-23-2018 23-30-06-28.csv' ,chunksize = 10000 )
481- concord_table = pd .read_excel ('input\CountryConcordanceIMF.xlsx' )
429+ data = pd .read_csv ('input\IMFGFS\ GFSCOFOG_02-23-2018 23-30-06-28.csv' ,chunksize = 10000 )
430+ concord_table = pd .read_csv ('input\IMFGFS\ CountryConcordanceIMF.csv' , encoding = "ISO-8859-1" )
482431 chunk_list = []
483432 for chunk in data :
484433 chunk ['FuncSector' ] = chunk [str ('COFOG Function Name' )] + chunk [str ('Sector Name' )]
485434 chunk = chunk .merge (concord_table , on = "Country Name" , how = 'left' )
486- chunk = chunk [chunk ['Country name in IFs ' ].notnull ()]
435+ chunk = chunk [chunk ['CountryName ' ].notnull ()]
487436 chunk_list .append (chunk )
488437
489438 data = pd .concat (chunk_list )
490439
491-
492-
493- data = pd .pivot_table (data , index = ["Country name in IFs" , "Unit Name" , 'Time Period' ], values = ['Value' ],
440+ data = pd .pivot_table (data , index = ["CountryName" , "Unit Name" , 'Time Period' ], values = ['Value' ],
494441 columns = ['FuncSector' ], aggfunc = [np .sum ])
495442 data = pd .DataFrame (data .to_records ())
496443 data .columns = [hdr .replace ("('sum', 'Value'," , "" ).replace (")" , "" ).replace ("'" , "" ) \
497444 for hdr in data .columns ]
498- print ("IMF GFS Expenditure data done" )
499- return (data )
500445
446+ data = data .rename (columns = {"Time Period" :"Year" })
501447
502- def IMFGFSExpenditureDataFile ():
503- import requests
504- import numpy as np
505- import matplotlib .pyplot as plt
506- import pandas as pd
507- import csv
508- import xlrd
509- import matplotlib .lines as mlines
510- import matplotlib .transforms as mtransforms
511- import xlsxwriter
512- import statsmodels .api as sm
513-
514- p = IMFGFSExpenditureData ()
515- writer = pd .ExcelWriter ('IMFGFSEXP.xls' , engine = 'xlsxwriter' )
516- p .to_excel (writer , sheet_name = 'EXP' )
517-
518- writer .save ()
448+ if write_csv_output == True :
449+ data .to_csv ('output\CSV\IMFGFSExpOutput.csv' )
519450
451+ return (data )
520452
521453def WDIData ():
522454 import numpy as np
@@ -618,4 +550,39 @@ def WDIDataFile():
618550 data = WDIData ()
619551 writer = pd .ExcelWriter ('WDISeries.xlsx' , engine = 'xlsxwriter' )
620552 data .to_excel (writer , sheet_name = 'WDIData' , merge_cells = False )
621- writer .save ()
553+ writer .save ()
554+
555+ #A function to write tables to SQL
556+ def Write_to_SQL (df ,con ):
557+ import pandas as pd
558+ import numpy as np
559+ df2 = df .drop (["CountryName" , "Year" ], axis = 1 )
560+ for col in (df2 ):
561+ data = df [["CountryName" , "Year" , str (col )]]
562+ data1 = pd .pivot_table (data , values = [str (col )], index = ["CountryName" ], columns = ["Year" ],
563+ aggfunc = [np .sum ])
564+
565+ # Get Most Recent Year
566+ dataRecent = data .dropna ()
567+ dataRecent = dataRecent .sort_values (["CountryName" , "Year" ])
568+ dataRecent = dataRecent .drop_duplicates ("CountryName" , keep = 'last' )
569+ dataRecent .reset_index ()
570+ dataRecent .columns = ["CountryName" , "MostRecentYear" , "MostRecentValue" ]
571+
572+ # Get Earliest Year
573+ dataEarliest = data .dropna ()
574+ dataEarliest = dataEarliest .sort_values (["CountryName" , "Year" ])
575+ dataEarliest = dataEarliest .drop_duplicates ("CountryName" , keep = 'first' )
576+ dataEarliest .reset_index ()
577+ dataEarliest .columns = ["CountryName" , "EarliestYear" , "EarliestValue" ]
578+
579+ data1 = pd .DataFrame (data1 .to_records ())
580+ val = "('sum', '" + str (col ) + "', "
581+ data1 .columns = [hdr .replace (val , "" ).replace (")" , "" ).replace ("'" , "" ) \
582+ for hdr in data1 .columns ]
583+
584+ data1 = pd .merge (data1 , dataRecent , how = 'left' , left_on = "CountryName" , right_on = "CountryName" )
585+ data1 = pd .merge (data1 , dataEarliest , how = 'left' , left_on = "CountryName" , right_on = "CountryName" )
586+
587+
588+ data1 .to_sql (name = str ("Series" ).strip () + str (col ).strip (), con = con , if_exists = "replace" , index = False )
0 commit comments