In [1]:
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
#census loading 

census_2011 ="Census2011data.xlsx"
df_census=pd.read_excel(census_2011,usecols= ["Level","Name","TRU","TOT_P"])

In [3]:
#census preprocessing
df_census=df_census[df_census["TRU"]=="Total"]
df_census =df_census[df_census["Level"] != "DISTRICT"]
df_census = df_census[["Name","TOT_P"]]
df_census['Name']=df_census['Name'].str.upper()

In [4]:
#load c18
c18="DDW-C18-0000.xlsx"
df_c18=pd.read_excel(c18,skiprows=[0,4,5])

#c18 preprocessing
df_c18=df_c18[["Area Name","Total/","Age-group","Number speaking second language","Number speaking third language"]]
df_c18=df_c18[df_c18["Age-group"]=="Total"]
df_c18=df_c18[df_c18["Total/"]=="Total"]
df_c18 = df_c18.rename(columns = {'Area Name':'Name'})
df_c18=df_c18[["Name","Number speaking second language","Number speaking third language"]]


In [5]:
#merge c18 and census data
df_merge = pd.merge(df_census,df_c18,how='inner',left_on='Name',right_on = 'Name')

#calculate one lang,two lang, three lang speakers
df_merge["Only one language"] = df_merge["TOT_P"]-df_merge["Number speaking second language"]
df_merge["exactly two languages"] = df_merge["Number speaking second language"] - df_merge["Number speaking third language"]
df_merge["Three languages or more"] = df_merge["Number speaking third language"]

#find the 3 to 2 and 2 to 1 lang speakers ratio
df_merge["3-to-2-ratio"]=df_merge["Three languages or more"].astype(float) /df_merge["exactly two languages"].astype(float)
df_merge["2-to-1-ratio"]=df_merge["exactly two languages"].astype(float) /df_merge["Only one language"].astype(float)

df_merge=df_merge[df_merge["Name"]!="INDIA"]

In [6]:
#sort by 3 to 2 ratio
df_3_to_2=df_merge[["Name","3-to-2-ratio"]]
df_3_to_2.sort_values(by="3-to-2-ratio",ascending=False,inplace=True)
#sort by 2 to 1 ratio
df_2_to_1=df_merge[["Name","2-to-1-ratio"]]
df_2_to_1.sort_values(by="2-to-1-ratio",ascending=False,inplace=True)

In [7]:
print(df_3_to_2[0:3])
print(df_3_to_2[-3:].sort_values(by="3-to-2-ratio",ascending=True))

          Name  3-to-2-ratio
30         GOA      1.925807
3       PUNJAB      1.504216
4   CHANDIGARH      1.248266
             Name  3-to-2-ratio
10          BIHAR      0.111582
5     UTTARAKHAND      0.119637
9   UTTAR PRADESH      0.127448


In [8]:
df_3_to_2=pd.concat([df_3_to_2[0:3],df_3_to_2[-3:].sort_values(by="3-to-2-ratio",ascending=True)])
df_2_to_1=pd.concat([df_2_to_1[0:3],df_2_to_1[-3:].sort_values(by="2-to-1-ratio",ascending=True)])

In [9]:
print(df_3_to_2)

             Name  3-to-2-ratio
30            GOA      1.925807
3          PUNJAB      1.504216
4      CHANDIGARH      1.248266
10          BIHAR      0.111582
5     UTTARAKHAND      0.119637
9   UTTAR PRADESH      0.127448


In [10]:
df_3_to_2.reset_index(inplace=True)
df_3_to_2.drop(['Name'],axis=1,inplace=True)
df_3_to_2.columns=['state/ut','3-to-2-ratio']

df_2_to_1.reset_index(inplace=True)
df_2_to_1.drop(['Name'],axis=1,inplace=True)
df_2_to_1.columns=['state/ut','2-to-1-ratio']


In [11]:
#load 3 to 2 ratio to csv 

outname_3_to_2 = '3-to-2-ratio.csv'
#outname_2_to_1 = '2-to-1-ratio.csv'
outfiles = './Output_Files'
if not os.path.exists(outfiles):
    os.mkdir(outfiles)
outdir=outfiles+'/Q4'
if not os.path.exists(outdir):
    os.mkdir(outdir)
filepath1 = os.path.join(outdir, outname_3_to_2)  
#filepath2 = os.path.join(outdir, outname_2_to_1)  

df_3_to_2.to_csv(filepath1,index=False)
#df_2_to_1.to_csv(filepath2,index=False)