In [1]:
import pandas as pd
import numpy as np
import re
import sqlite3

In [30]:
dd = pd.read_csv('DataDict.csv')
df = pd.read_csv('WDICSV.csv')
country = pd.read_excel('WDI_Country.xlsx')
gdp = df[df['Indicator Code'].isin(['NY.GDP.MKTP.CD'])]
gdp = gdp.drop(['Country Code', 'Indicator Name', 'Indicator Code'], axis=1)
gdp = gdp.melt(id_vars=["Country Name"], var_name="Year", value_name="gdp")

imp = df[df['Indicator Code'].isin(['NE.IMP.GNFS.CD'])]
imp = imp.drop(['Country Code', 'Indicator Name', 'Indicator Code'], axis=1)
imp = imp.melt(id_vars=["Country Name"], var_name="Year", value_name="imp")

gdp_cn = df[df['Indicator Code'].isin(['NY.GDP.MKTP.CN'])]
gdp_cn = gdp_cn.drop(['Country Code', 'Indicator Name', 'Indicator Code'], axis=1)
gdp_cn = gdp_cn.melt(id_vars=["Country Name"], var_name="Year", value_name="gdp_cn")

df = df[df['Indicator Code'].isin(dd['Code in Source'])]

vlist = []

for i in dd.index:
    v = dd.loc[i, 'Variable']
    code = dd.loc[i, 'Code in Source']
    f = dd.loc[i, 'Formula']
    if pd.isnull(dd.loc[i, 'Decimal Places']):
        d = None
    else:
        d = int(dd.loc[i, 'Decimal Places'])
        
    dt = df[df['Indicator Code'] == code]
    dt = dt.drop(['Country Code', 'Indicator Name', 'Indicator Code'], axis=1)
    dt = dt.melt(id_vars=["Country Name"], var_name="Year", value_name="val")
    
    if pd.isnull(f):
        pass
    elif f == "%'NE.IMP.GNFS.CD'":
        dt = dt.merge(imp, on=['Country Name', 'Year'], how='left')
        dt['val'] = dt['val'] / dt['imp'] * 100
        dt = dt.drop(['imp'], axis=1)
    elif f == "%'NY.GDP.MKTP.CN'":
        dt = dt.merge(gdp_cn, on=['Country Name', 'Year'], how='left')
        dt['val'] = dt['val'] / dt['gdp_cn'] * 100
        dt = dt.drop(['gdp_cn'], axis=1)
    elif f == '/1000000000':
        dt['val'] = dt['val'] / 1_000_000_000
    elif f == '/1000000':
        dt['val'] = dt['val'] / 1_000_000
    else:
        dt = dt.merge(gdp, on=['Country Name', 'Year'], how='left')
        dt['val'] = dt['val'] / dt['gdp'] * 100
        dt = dt.drop(['gdp'], axis=1)
        
    if d is not None:
        dt['val'] = dt['val'].round(d)
        
    n = 'Country Name'
    y = 'Year'
    
    first_last = dt.sort_values([n, y]).groupby(n)['val'].agg(['first', 'last']).reset_index()
    first_last = first_last.rename(columns={"first": "Earliest", "last": "MostRecent"})
    
    dt[y] = pd.to_numeric(dt[y], errors='coerce')
    
    dt = dt.pivot_table(index=n, columns=y, values='val').reset_index()
    
    year_cols = sorted([col for col in dt.columns if isinstance(col, (int, float)) or re.match(r"^\d{4}$", str(col))], key=lambda x: int(x))
    non_year_cols = [col for col in dt.columns if col not in year_cols + [n]]
    dt = dt[[n] + year_cols + non_year_cols]

    dt = dt.merge(first_last, on=n, how='left')
    
    dt = country.merge(dt, on=n, how='left')
    
    dt = dt.drop(columns=n)
    
    dt['Variable'] = v
    
    vlist.append(dt)

final_df = pd.concat(vlist, ignore_index=True)

year_cols = [col for col in final_df.columns if re.match(r"^\d{4}$", str(col))]
year_cols = sorted(year_cols, key=int)

meta_cols = ['Earliest', 'MostRecent', 'Variable']

other_cols = [col for col in final_df.columns if col not in year_cols + meta_cols]

final_order = other_cols + year_cols + meta_cols

final_df = final_df[final_order]

final_df.to_csv('IFsImportwdi.csv', index=False)

In [5]:
dd = pd.read_csv('DataDict.csv')
conn = sqlite3.connect(r'C:\IFs\RUNFILES\IFsDataImport - Copy (5).db')
cursor = conn.cursor()
final_df = pd.read_csv("IFsImportwdi.csv")

for tb in dd.Table:
    v = tb.replace("Series", "").replace("Rev23", "")
    print(f"Processing table for Variable: {v}")
    dt = final_df[final_df['Variable'] == v]
    dt = dt.drop(columns=['Variable'])
    sql_drop_table = f"DROP TABLE IF EXISTS [{tb}];"
    cursor.execute(sql_drop_table)
    sql_create_table = f"CREATE TABLE [{tb}] (Country VARCHAR(255), FIPS_CODE VARCHAR(255), "
    for c in dt.columns[2:]: 
        sql_create_table += f"[{c}] DOUBLE(53), "
    sql_create_table = sql_create_table.rstrip(", ") + ");"

    cursor.execute(sql_create_table)
    dt.to_sql(name=tb, con=conn, if_exists='append', index=False)

    conn.commit()
dd.to_sql(name='DataDict', con=conn, if_exists='replace', index=False)

conn.close()


Processing table for Variable: ArmsImp%TotImp
Processing table for Variable: XDebtPNG%GDP
Processing table for Variable: XDebtPPG%GDP
Processing table for Variable: XFlowsIDA%GDP
Processing table for Variable: XFlowsIMFNonCon%GDP
Processing table for Variable: XIMFCredit%GDP
Processing table for Variable: XIncPayments%GDP
Processing table for Variable: XPortBonds%GDP
Processing table for Variable: XPortEquity%GDP
Processing table for Variable: XReserves%GDP
Processing table for Variable: XFlowsIMFCon%GDP
Processing table for Variable: XIncReceipts%GDP
Processing table for Variable: XFlowsIBRD%GDP
Processing table for Variable: XWBLoans%GDP
Processing table for Variable: GovtCurRev%GDP
Processing table for Variable: ExportServices%GDP
Processing table for Variable: ExportsMerchandise%GDP
Processing table for Variable: ImportServices%GDP
Processing table for Variable: ImportsMerchandise%GDP
Processing table for Variable: XWorkerRemitPaid%GDP
Processing table for Variable: XWorkerRemitRec

In [None]:
ifshist_path_old = r"C:\IFs\DATA\IFsHistSeries.db"

with sqlite3.connect(r"C:\IFs\RUNFILES\IFsDataImport - Copy (5).db", timeout=30) as conn_wdi, \
     sqlite3.connect(ifshist_path_old, timeout=30) as conn_ifs_old:

    dd_wdi = pd.read_sql_query("SELECT * FROM [DataDict]", conn_wdi)
    cursor_wdi = conn_wdi.cursor()

    for tb in dd_wdi["Table"]:
        print(f"Processing table: {tb}")

        df_wdi = pd.read_sql_query(f"SELECT * FROM [{tb}]", conn_wdi)
        df_ifs = pd.read_sql_query(f"SELECT * FROM [{tb}]", conn_ifs_old)
        df_wdi = df_wdi.sort_values(by="Country").reset_index(drop=True)
        df_ifs = df_ifs.sort_values(by="Country").reset_index(drop=True)

        all_cols = df_wdi.columns.tolist()
        if not set(all_cols).issuperset(df_ifs.columns):
            for c in df_ifs.columns:
                if c not in df_wdi.columns:
                    df_wdi[c] = np.nan

        df_wdi.set_index(["Country", "FIPS_CODE"], inplace=True)
        df_ifs.set_index(["Country", "FIPS_CODE"], inplace=True)

        for col in df_wdi.columns:
            if col in df_ifs.columns:
                df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])

        df_wdi.reset_index(inplace=True)

        data_cols = [c for c in df_wdi.columns if c not in ["Country", "FIPS_CODE", "Earliest", "MostRecent"]]

        Ear = []
        Rec = []
        for i in range(df_wdi.shape[0]):
            row_vals = df_wdi.loc[i, data_cols].dropna()
            Ear.append(row_vals.values[0] if not row_vals.empty else np.nan)
            Rec.append(row_vals.values[-1] if not row_vals.empty else np.nan)

        df_wdi["Earliest"] = Ear
        df_wdi["MostRecent"] = Rec

        cursor_wdi.execute(f"DELETE FROM [{tb}];")
        df_wdi.to_sql(name=tb, con=conn_wdi, if_exists="append", index=False)

    conn_wdi.commit()


Processing table: SeriesArmsImp%TotImp
Processing table: SeriesXDebtPNG%GDP
Processing table: SeriesXDebtPPG%GDP
Processing table: SeriesXFlowsIDA%GDP
Processing table: SeriesXFlowsIMFNonCon%GDP
Processing table: SeriesXIMFCredit%GDP
Processing table: SeriesXIncPayments%GDP
Processing table: SeriesXPortBonds%GDP
Processing table: SeriesXPortEquity%GDP
Processing table: SeriesXReserves%GDP
Processing table: SeriesXFlowsIMFCon%GDP
Processing table: SeriesXIncReceipts%GDP
Processing table: SeriesXFlowsIBRD%GDP
Processing table: SeriesXWBLoans%GDP
Processing table: SeriesGovtCurRev%GDP


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesExportServices%GDP
Processing table: SeriesExportsMerchandise%GDP
Processing table: SeriesImportServices%GDP
Processing table: SeriesImportsMerchandise%GDP
Processing table: SeriesXWorkerRemitPaid%GDP
Processing table: SeriesXWorkerRemitReceived%GDP
Processing table: SeriesXDebt%GDP
Processing table: SeriesNetForeignAssets%GDP
Processing table: SeriesLabor
Processing table: SeriesPopulationUrban


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesGDPCurDol
Processing table: SeriesXDebt


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesLaborFemale%
Processing table: SeriesLaborSecInd%Tot


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesLaborSecSer%Tot
Processing table: SeriesVaddInd%


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesOresMetsIm%MerchIm
Processing table: SeriesGovtDebt%GDP
Processing table: SeriesVaddMan%
Processing table: SeriesPopForeign%
Processing table: SeriesLaborAgriSector


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesGenderUnpaidDomesticWorkTimeFemale
Processing table: SeriesGenderUnpaidDomesticWorkTimeMale
Processing table: SeriesIncBelow2D15c%
Processing table: SeriesIncBelow3D65c%
Processing table: SeriesIncBelow6D85c%
Processing table: SeriesPovGap2D15cperDay
Processing table: SeriesPovGap3D65perDay
Processing table: SeriesPovGap6D85cperDay
Processing table: SeriesExportGoodSer%
Processing table: SeriesEnElecTransLoss%


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesGovtEdPub%GDP
Processing table: SeriesGovWBDoingBusinessCostofStarting


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesGovtMil%GDPWDI
Processing table: SeriesGovCon%GDP
Processing table: SeriesGovExpense%GDP
Processing table: SeriesImportGoodSer%
Processing table: SeriesEnElecConsPerCap


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesInvestGrCapForm%GDP
Processing table: SeriesXFDIInflows%GDP
Processing table: SeriesXFDIOutflows%GDP
Processing table: SeriesIncShareL20%
Processing table: SeriesLaborUnemploy%
Processing table: SeriesTaxImportDuties%Rev
Processing table: SeriesOresMetsEx%MerchEx
Processing table: SeriesPopContrUse%WDI
Processing table: SeriesEnElecAccess%National
Processing table: SeriesEnElecAccess%Rural
Processing table: SeriesEnElecAccess%Urban
Processing table: SeriesMalnChil%WeightWB


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesTaxGoodSer%CurRev
Processing table: SeriesTaxSocSec%CurRev
Processing table: SeriesGiniExtended
Processing table: SeriesICTInternet%Pop


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesLaborAgr%TotFemale
Processing table: SeriesVaddAg%
Processing table: SeriesVaddSer%
Processing table: SeriesLaborAgr%TotMale


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesXCurActBal%GDP
Processing table: SeriesMaternalMortalityRatio
Processing table: SeriesEnvPMWDI
Processing table: SeriesChildStuntingPercentWDI
Processing table: SeriesHealthMalarBedNetsWDI
Processing table: SeriesAidRecGrant%TotRev
Processing table: SeriesIncBelowNPL%WDI
Processing table: SeriesICTImport%Imp


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])
  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


Processing table: SeriesICTExport%Exp


  df_wdi[col] = df_wdi[col].combine_first(df_ifs[col])


In [37]:
conn.close()
