In [1]:
import pandas as pd

In [2]:
df0 = pd.read_stata("1213_sec_c.dta", convert_categoricals=False)

In [3]:
# convert hhcode column to string for easy access
df0["hhcode"] = df0["hhcode"].astype("str")

# cleaning important columns
df0["ever_admitted"] = "no"
df0.loc[(df0["scq01"] == 1), "ever_admitted"] = "yes"

df0["currently_enrolled"] = "no"
df0.loc[(df0["scq05"] == 1), "currently_enrolled"] = "yes"

In [4]:
# Reducing to the questions of interest:
df0_rel = df0[
    [
        "hhcode",
        "idc",
        "ever_admitted",
        "currently_enrolled",
        "region",
        "province",
    ]
].copy()

In [5]:
df00 = pd.read_stata("1213_roster.dta")

In [6]:
df00["hhcode"] = df00["hhcode"].astype("str")

df00["sex"] = df00["sbq03"]
df00["marital_status"] = df00["sbq06"]
df00["subprovince"] = df00["district"]

# modifying marital status to align with desired output
df00["marital_status"].replace(
    ["unmarried", "married", "widow", "divorced", "nikkah"],
    [1, 2, 3, 4, 5],
    inplace=True,
)

In [7]:
df00_rel = df00[["hhcode", "age", "idc", "sex", "marital_status", "subprovince"]].copy()

In [8]:
years12_13 = pd.merge(df0_rel, df00_rel, on=["hhcode", "idc"], how='inner', indicator=False)

In [9]:
# filtering the age for individuals between the ages of 4 - 15
years12_13 = years12_13.loc[(years12_13["age"] > 4) & (years12_13["age"] <= 15)]

years12_13.loc[(years12_13["region"] == 1), "region"] = "urban"
years12_13.loc[(years12_13["region"] == 2), "region"] = "rural"

years12_13["subprovince code"] = years12_13["hhcode"].apply(lambda x: x[0:4])
years12_13["subprovince code"] = years12_13["subprovince code"].astype("int")

years12_13["province"] = years12_13["province"].astype("int")

years12_13["province"] = years12_13["province"].map(
    {1: "K.P.K", 2: "Punjab", 3: "Sindh", 4: "Balochistan", 6: "Islamabad"}
)

years12_13.subprovince = years12_13.subprovince.str.capitalize()

# creating year column with lowest year as value
years12_13["year"] = 2012

In [10]:
years12_13.sample(10)

Unnamed: 0,hhcode,idc,ever_admitted,currently_enrolled,region,province,age,sex,marital_status,subprovince,subprovince code,year
343782,3512000507.0,3,no,no,rural,Sindh,6,male,1,Ghotki,3512,2012
106278,2222001614.0,7,yes,yes,rural,Punjab,14,male,1,Layyah,2222,2012
280574,3182002008.0,6,no,no,rural,Sindh,8,male,1,Thatta,3182,2012
112903,2242000405.0,7,no,no,rural,Punjab,13,female,1,Rajanpur,2242,2012
273244,3161000107.0,4,no,no,urban,Sindh,14,male,1,Tando allah yar,3161,2012
69407,1712001513.0,9,no,no,rural,K.P.K,8,female,1,Charsada,1712,2012
65438,1622001106.0,5,no,yes,rural,K.P.K,5,female,1,Swabi,1622,2012
40636,1521000812.0,5,yes,yes,urban,K.P.K,12,male,1,Chitral,1521,2012
216684,2722001916.0,3,yes,yes,rural,Punjab,13,female,1,Chakwal,2722,2012
225821,2742001715.0,5,yes,yes,rural,Punjab,13,female,1,Rawalpindi,2742,2012


### Final Step

In [11]:
years12_13.to_csv("years12_13_f.csv")