In [None]:
import numpy as np
import pandas as pd
import os
import csv
import time


class PrepareData():

    def __init__(self):

        fileNameCoup = os.environ.get( "FILE_NAME_COUP" )
        fileNameStab = os.environ.get( "FILE_NAME_STAB" )
        fileNamePlty = os.environ.get( "FILE_NAME_PLTY" )
        fileNameLead = os.environ.get( "FILE_NAME_LEAD" )
        fileNameUrbp = os.environ.get( "FILE_NAME_URBP" )
        fileNameGDPC = os.environ.get( "FILE_NAME_GDPC" )
        fileNameGrow = os.environ.get( "FILE_NAME_GROW" )
        fileNameDivy = os.environ.get( "FILE_NAME_DIVY" )

        self.coup = pd.read_csv( "../assets/csv/{}".format("Coups.csv") )
        self.stab = pd.read_csv( "../assets/csv/{}".format("Politicalinstability2.csv") )
        self.polity = pd.read_csv( "../assets/csv/{}".format("polity.csv") )
        self.leaders = pd.read_csv( "../assets/csv/{}".format("leaders.csv") )
        self.urban = pd.read_csv( "../assets/csv/{}".format("urbanpop.csv") )
        self.gdp = pd.read_csv( "../assets/csv/{}".format("RealpercapitaGDP.csv") )
        self.growth = pd.read_csv( "../assets/csv/{}".format("GDPgrowth.csv") )
        self.diversity = "../assets/csv/{}".format("diversity.csv")
        

    def joinTables(self):
        coup = self.coup.apply(cleanCoup)
        stab = self.stab.apply(self.cleanStab)
        polity = self.polity.apply(self.cleanPolity)
        leaders = self.leaders
        urban = self.urban.apply(self.cleanUrban)
        gdp = self.gdp.apply(self.cleanGDP)
        growth = self.growth.apply(selfGrowth)
        
        coup1 = coup.merge( stab, how="inner", left_on=["country", "year"], right_on=["country", "year"] )
        coup2 = coup1.merge( polity, how="inner", left_on=["country", "year"], right_on=["country", "year"] )
        coup3 = coup2.merge( leaders, how="inner", left_on=["country", "year"], right_on=["country", "year"] )
        Econ = gdp.merge( growth, how="inner", left_on=["country", "year"], right_on=["country", "year"] )
        Econ1 = Econ.merge( urban, how="inner", left_on=["country", "year"], right_on=["country", "year"] )
        DF = Econ1.merge( coup3, how="inner", left_on=["country", "year"], right_on=["country", "year"] )

        def read_file(path):
            with open(path, 'r') as f:
                diversity = [row for row in csv.reader(f.read().splitlines())]
            return diversity

        diversity = read_file(path)
        diversity = diversity[1:]

        def function(i):return (i[0], i[1:])
        div_dict = map(function, diversity)
        divDict = dict(div_dict)

        loc = DF["country"].tolist()

        ethnicity = []
        linguistic = []
        religious = []

        for i in loc:
            for country, ethnic in divDict.items():
                if i == country:
                        ethnicity.append( ethnic[0] )
                        linguistic.append( ethnic[1] )
                        religious.append( ethnic[2] )

        DF["ethnic"] = ethnicity
        DF["linguistic"] = linguistic
        DF["religious"] = religious
        DF.dropna(axis=0, how="any", inplace=True)
        DF.drop(["year"], axis=1, inplace=True)
        return DF


    def cleanCoup(self,coup):
        coup["sumcoup"] = coup["scoup1"] + coup["atcoup2"]
        coup["coup"] = coup.sumcoup.apply(self.makeBinary)
        coup.drop( ["scoup1", "atcoup2", "sumcoup"], axis=1, inplace=True )
        return coup

    def cleanStab(self,stab):
        dummies = pd.get_dummies(stab["PTYPE"])

        stab = stab[["COUNTRY", "YEAR", "GENDEATHMAG", "ETHMAGFATAL", 
                         "ETHMAGFIGHT", "REVMAGFATAL", "REVMAGFIGHT"]].join(dummies)

        stab.columns = ["COUNTRY", "YEAR", "GENDEATHMAG", "ETHMAGFATAL", 
                            "ETHMAGFIGHT", "REVMAGFATAL", "REVMAGFIGHT", "NONE",
                                "ETH", "REV", "GEN"]

        aggMap = { "NONE" : "max", "ETH" : "max", "REV" : "max", 
                   "GEN" : "max", "GENDEATHMAG" : "max", 
                   "ETHMAGFATAL" : "max", "ETHMAGFIGHT" : "max", 
                   "REVMAGFATAL" : "max", "REVMAGFIGHT" : "max" }

        stab1 = stab.groupby( ["COUNRY", "YEAR"] ).agg(aggMap).reset_index()
        stab1 = stab1[stab1.ethmagfatal != 9]
        stab1 = stab1[stab1.ethmagfight != 9]
        stab1 = stab1[stab1.revmagfatal != 9]
        stab1 = stab1[stab1.revmagfight != 9]
        stab1.columns = map(str.lower, stab1.columns)
        return stab1

    def cleanPolity(self,polity):
        polity.drop( ["flag", "fragment", "democ", "autoc", "polity1"], axis=1, inplace=True )
        return polity


    def cleanUrban(self,urban):
        col1 = urban.columns[0]
        col2 = urban.columsn[1:].tolist()
        urban1 = pd.melt(urban, id_vars=(col1), var_name="year", value_vars=(col2), value_name="urbanpop")
        urban1.sort_values(["country", "year"], inplace=True)
        return urban1


    def cleanGDP(self,gdp):
        col3 = gdp.columns[0]
        col4 = gdp.columns[1:].tolist()
        gdp1 = pd.melt(gdp, id_vars=(col3), var_name="year", value_vars=(col4), value_name="gdppercap")
        gdp1.sort_values(["country", "year"], inplace=True)
        gdp1['gdppercap'] = gdp1['gdppercap'].str.replace('$', '')
        gdp1['gdppercap'] = gdp1['gdppercap'].str.replace(',', '')
        gdp1['gdppercap'] = gdp1['gdppercap'].convert_objects(convert_numeric=True)
        return gdp1


    def cleanGrowth(self,growth):
        col5 = growth.columns[0]
        col6 = growth.columns[1:].tolist()
        growth1 = pd.melt(growth, id_vars=(col5), var_name="year", value_vars=(col6), value_name="gdpgrowth")
        growth1.sort_values(["country", "year"], inplace=True)
        growth1['gdpgrowth'] = growth1['gdpgrowth'].convert_objects(convert_numeric=True)

    def makeBinary(self,coup):
        if( coup > 0 ):
            return 1
        elif( coup == 0 ):
            return 0


    def saveFile(self,DF):

        DF.to_csv( "../assets/FINALDATA", sep=",", index = False )
        print "Successfully Saved Files to Path ../assets"


pipeLineObj =  PrepareData()
pipeLineObj.joinTables()


In [None]:
class PrepareData():

    def __init__(self):

        fileNameCoup = os.environ.get( "FILE_NAME_COUP" )
        
        self.coup = pd.read_csv( "../assets/csv/{}".format("Coups.csv") )

    
    def cleanCoup(self,coup):
        coup["sumcoup"] = coup["scoup1"] + coup["atcoup2"]
        coup["coup"] = coup.sumcoup.apply(self.makeBinary)
        coup.drop( ["scoup1", "atcoup2", "sumcoup"], axis=1, inplace=True )
        return coup
    
    def makeNewCol(self,coup):
        
    
    def makeBinary(self,coup):
            if( coup > 0 ):
                return 1
            elif( coup == 0 ):
                return 0
