# Census Data

This notebook imports 4 csv files from the Longitudinal Tract Data Base (LTDB) which contain census data for 262 census tracts in the Boston area. After a number of pre-processing steps, I calculate the percentage change between the two censuses with an end goal of using k-means clustering on the tracts.

In [1]:
import pandas as pd
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

## File 1: 2000 Census, Long-form questionnaire

### Pre-processing

In [952]:
# import file
df_2000 = pd.read_csv('CSV_files/LTDB_Std_2000_fullcount.csv',sep=',', engine='python')

# subset the file to Massachusetts
df_2000_MA = df_2000[(df_2000['state']=='MA')]

In [953]:
# collect tracts for Cambridge and Brookline
Cambridge_tracts = list((range(3522,3531)))+list(((range(3532,3551))))
Cambridge_tracts = Cambridge_tracts + [3521.01,3521.02,3531.02,3531.01]
Cambridge_tracts = [f'Census Tract {str(x)}' for x in Cambridge_tracts]

Brookline_tracts = list(range(4001,4013))
Brookline_tracts = [f'Census Tract {str(x)}' for x in Brookline_tracts]

Somerville_tracts = [3507,3508,3505,3504,3506,3503,3502,3501.04,3501.03,
                     3514.03,3512.03,3512.04,3513,3511,3510,3509,3515]
Somerville_tracts = [f'Census Tract {str(x)}' for x in Somerville_tracts]

In [954]:
# get Suffolk County
df_2000_MA_1 = df_2000_MA[df_2000_MA['county']=='Suffolk County']

# get Cambridge
df_2000_MA_2 =df_2000_MA[df_2000_MA['tract'].apply(lambda x: any(tract for tract in Cambridge_tracts if tract in x))]

# get Brookline
df_2000_MA_3=df_2000_MA[df_2000_MA['tract'].apply(lambda x: any(tract for tract in Brookline_tracts if tract in x))]

# get Somerville
df_2000_MA_4=df_2000_MA[df_2000_MA['tract'].apply(lambda x: any(tract for tract in Somerville_tracts if tract in x))]

In [955]:
print(df_2000_MA_1.shape)
print(df_2000_MA_2.shape)
print(df_2000_MA_3.shape)
print(df_2000_MA_4.shape)

(203, 51)
(32, 51)
(12, 51)
(17, 51)


In [956]:
# merge the 3
df_MA_full_2000= pd.concat([df_2000_MA_1,df_2000_MA_2,df_2000_MA_3,df_2000_MA_4],ignore_index=True)

# drop irrelevant columns
df_MA_full_2000.drop(columns=['placefp10','cbsa10','metdiv10','ccflag10'],axis=1,inplace=True)

# reset the index
df_MA_full_2000.reset_index(inplace=True, drop=True)

In [957]:
# rename tract id column to match 2010
df_MA_full_2000.rename(columns={'TRTID10':'tractid'},inplace=True)

### Feature engineering

In [958]:
# calculate percentages of population for each category 
df_MA_full_2000['percent_white00']=100*df_MA_full_2000['NHWHT00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_black00']=100*df_MA_full_2000['NHBLK00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_asian00']=100*df_MA_full_2000['ASIAN00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_hispanic00']=100*df_MA_full_2000['HISP00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_indian00']=100*df_MA_full_2000['INDIA00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_chinese00']=100*df_MA_full_2000['CHINA00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_filip00']=100*df_MA_full_2000['FILIP00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_japan00']=100*df_MA_full_2000['JAPAN00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_korean00']=100*df_MA_full_2000['KOREA00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_viet00']=100*df_MA_full_2000['VIET00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_mex00']=100*df_MA_full_2000['MEX00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_pr00']=100*df_MA_full_2000['PR00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_cuban00']=100*df_MA_full_2000['CUBAN00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_vacant_housing00']=100*df_MA_full_2000['VAC00']/df_MA_full_2000['HU00']
df_MA_full_2000['percent_occupied_housing00']=100*df_MA_full_2000['OHU00']/df_MA_full_2000['HU00']
df_MA_full_2000['percent_under18_00']=100*df_MA_full_2000['A18UND00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_60andup_00']=100*df_MA_full_2000['A60UP00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_75andup_00']=100*df_MA_full_2000['A75UP00']/df_MA_full_2000['POP00']
df_MA_full_2000['percent_owneroccupied_00']=100*df_MA_full_2000['OWN00']/df_MA_full_2000['HU00']
df_MA_full_2000['percent_renteroccupied_00']=100*df_MA_full_2000['RENT00']/df_MA_full_2000['HU00']

In [959]:
# let's recreate the race buckets into "percent white" and "percent non-white"
df_MA_full_2000['percent_non-white00'] = 100*(df_MA_full_2000.iloc[:,6:20].sum(1)/df_MA_full_2000['POP00'])

In [960]:
# now lets drop all other columns
df_MA_full_2000.drop(df_MA_full_2000.columns[5:47],axis=1,inplace=True)

In [961]:
## save to csv for classification
df_MA_full_2000.to_csv('Final_2000_data.csv')

## File 1: 2010 Census, Long-form questionnaire

### Pre-processing

In [962]:
df_2010 = pd.read_csv('CSV_files/LTDB_Std_2010_fullcount.csv',sep=',', engine='python')

In [963]:
df_2010_MA = df_2010[(df_2010['state']=='MA')]

In [964]:
# same steps for 2010 

# get Suffolk County
df_2010_MA_1 = df_2010_MA[df_2010_MA['county']=='Suffolk County']

# get Cambridge
df_2010_MA_2 =df_2010_MA[df_2010_MA['tract'].apply(lambda x: any(tract for tract in Cambridge_tracts if tract in x))]

# get Brookline
df_2010_MA_3=df_2010_MA[df_2010_MA['tract'].apply(lambda x: any(tract for tract in Brookline_tracts if tract in x))]

# get Somerville
df_2010_MA_4=df_2010_MA[df_2010_MA['tract'].apply(lambda x: any(tract for tract in Somerville_tracts if tract in x))]

In [965]:
print(df_2010_MA_1.shape)
print(df_2010_MA_2.shape)
print(df_2010_MA_3.shape)
print(df_2010_MA_4.shape)

(204, 47)
(32, 47)
(12, 47)
(17, 47)


In [966]:
# merge the 3
df_MA_full_2010= pd.concat([df_2010_MA_1,df_2010_MA_2,df_2010_MA_3,df_2010_MA_4],ignore_index=True)

# reset the index
df_MA_full_2010.reset_index(inplace=True, drop=True)

### Feature engineering

In [967]:
# calculate percentages of population for each category 
df_MA_full_2010['percent_white10']=100*df_MA_full_2010['nhwht10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_black10']=100*df_MA_full_2010['nhblk10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_asian10']=100*df_MA_full_2010['asian10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_hispanic10']=100*df_MA_full_2010['hisp10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_indian10']=100*df_MA_full_2010['india10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_chinese10']=100*df_MA_full_2010['china10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_filip10']=100*df_MA_full_2010['filip10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_japan10']=100*df_MA_full_2010['japan10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_korean10']=100*df_MA_full_2010['korea10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_viet10']=100*df_MA_full_2010['viet10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_mex10']=100*df_MA_full_2010['mex10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_pr10']=100*df_MA_full_2010['pr10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_cuban10']=100*df_MA_full_2010['cuban10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_vacant_housing10']=100*df_MA_full_2010['vac10']/df_MA_full_2010['hu10']
df_MA_full_2010['percent_occupied_housing10']=100*df_MA_full_2010['ohu10']/df_MA_full_2010['hu10']
df_MA_full_2010['percent_under18_10']=100*df_MA_full_2010['a18und10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_60andup_10']=100*df_MA_full_2010['a60up10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_75andup_10']=100*df_MA_full_2010['a75up10']/df_MA_full_2010['pop10']
df_MA_full_2010['percent_owneroccupied_10']=100*df_MA_full_2010['own10']/df_MA_full_2010['hu10']
df_MA_full_2010['percent_renteroccupied_10']=100*df_MA_full_2010['rent10']/df_MA_full_2010['hu10']

In [968]:
# let's recreate the race buckets into "white" and "non-white"
df_MA_full_2010['non-white10'] = 100*(df_MA_full_2010.iloc[:,6:20].sum(1)/df_MA_full_2010['pop10'])

In [969]:
# now lets drop all other columns
df_MA_full_2010.drop(df_MA_full_2010.columns[5:47],axis=1,inplace=True)

In [970]:
# save to csv to predict
df_MA_full_2010.to_csv('Final_2010_data.csv')

## Percentage change 2000-2000, Long-form questionnaire

In [941]:
## New df for change in values between 2000 and 2010
df_joined = df_MA_full_2000.merge(df_MA_full_2010, on='tractid')

In [942]:
df_joined.drop(columns=['state_y','county_y','tract_y'],axis=1,inplace=True)

In [943]:
df_joined.columns

Index(['tractid', 'state_x', 'county_x', 'tract_x', 'POP00', 'percent_white00',
       'percent_black00', 'percent_asian00', 'percent_hispanic00',
       'percent_indian00', 'percent_chinese00', 'percent_filip00',
       'percent_japan00', 'percent_korean00', 'percent_viet00',
       'percent_mex00', 'percent_pr00', 'percent_cuban00',
       'percent_vacant_housing00', 'percent_occupied_housing00',
       'percent_under18_00', 'percent_60andup_00', 'percent_75andup_00',
       'percent_owneroccupied_00', 'percent_renteroccupied_00',
       'percent_non-white00', 'pop10', 'percent_white10', 'percent_black10',
       'percent_asian10', 'percent_hispanic10', 'percent_indian10',
       'percent_chinese10', 'percent_filip10', 'percent_japan10',
       'percent_korean10', 'percent_viet10', 'percent_mex10', 'percent_pr10',
       'percent_cuban10', 'percent_vacant_housing10',
       'percent_occupied_housing10', 'percent_under18_10',
       'percent_60andup_10', 'percent_75andup_10', 'percent

In [944]:
# 2000 starts at column 4
i = 4
# 2010 starts at column
j = 26
# stop once you get to final column
while j < 48:
    try:
        df_joined[f'{df_joined.columns[i]}_PC'] = df_joined.iloc[:,4:].apply(lambda x: 100*
                                    ((x[df_joined.columns[j]]-x[df_joined.columns[i]])
                                   /x[df_joined.columns[i]]),axis=1)
    except ZeroDivisionError:
        0
        
    i +=1
    j +=1

  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


In [945]:
df_joined.to_csv('CSV_files/Full_count_census_changes.csv')

## File 3: 2000 Census, Short-form questionnaire

### Pre-processing

In [862]:
# import the file
df_2000_2 = pd.read_csv('CSV_files/LTDB_Std_2000_Sample.csv',sep=',', engine='python')

# subset to MA
df_2000_MA_2 = df_2000_2[(df_2000_2['state']=='MA')]

In [863]:
# get Suffolk County
df_MA_sample_2000_1 = df_2000_MA_2[df_2000_MA_2['county']=='Suffolk County']

# get Cambridge
df_MA_sample_2000_2=df_2000_MA_2[df_2000_MA_2['tract'].apply(lambda x: any(tract for tract in Cambridge_tracts if tract in x))]

# get Brookline
df_MA_sample_2000_3=df_2000_MA_2[df_2000_MA_2['tract'].apply(lambda x: any(tract for tract in Brookline_tracts if tract in x))]

# get Somerville
df_MA_sample_2000_4=df_2000_MA_2[df_2000_MA_2['tract'].apply(lambda x: any(tract for tract in Somerville_tracts if tract in x))]

In [864]:
print(df_MA_sample_2000_1.shape)
print(df_MA_sample_2000_2.shape)
print(df_MA_sample_2000_3.shape)
print(df_MA_sample_2000_4.shape)

(203, 77)
(32, 77)
(12, 77)
(17, 77)


In [865]:
# merge the 3
df_MA_sample_2000= pd.concat([df_MA_sample_2000_1,df_MA_sample_2000_2,df_MA_sample_2000_3,df_MA_sample_2000_4],ignore_index=True)

# reset the index
df_MA_sample_2000.reset_index(inplace=True, drop=True)

# drop irrelevant columns
df_MA_sample_2000.drop(columns=['placefp10','cbsa10','metdiv10','ccflag10'],axis=1,inplace=True)

In [866]:
df_MA_sample_2000.shape

(264, 73)

In [867]:
df_MA_sample_2000.drop(columns=['state','county','tract'],axis=1,inplace=True)

In [868]:
# drop ethnic groups
df_MA_sample_2000.drop(df_MA_sample_2000.columns[2:12],axis=1,inplace=True)

In [869]:
# drop columns that are not variables in 2010
df_MA_sample_2000.drop(columns=['DIS00','CNI16U00'],axis=1,inplace=True)

### Feature engineering

In [870]:
# percentage
df_MA_sample_2000['percent_foreign_born']=100*df_MA_sample_2000['FB00']/df_MA_sample_2000['POP00SF3']
df_MA_sample_2000['percent_naturalized']=100*df_MA_sample_2000['NAT00']/df_MA_sample_2000['POP00SF3']
df_MA_sample_2000['percent_recent_immigrants(10)']=100*df_MA_sample_2000['N10IMM00']/df_MA_sample_2000['POP00SF3']
df_MA_sample_2000['percent_other_languages']=100*df_MA_sample_2000['OLANG00']/df_MA_sample_2000['AG5UP00']
df_MA_sample_2000['percent_hs_degree_orless']=100*df_MA_sample_2000['HS00']/df_MA_sample_2000['AG25UP00']
df_MA_sample_2000['percent_4yrcollege_degree_ormore']=100*df_MA_sample_2000['COL00']/df_MA_sample_2000['AG25UP00']
df_MA_sample_2000['percent_married']=100*df_MA_sample_2000['Mar-00']/df_MA_sample_2000['AG15UP00']
df_MA_sample_2000['percent_unemployed']=100*df_MA_sample_2000['UNEMP00']/df_MA_sample_2000['CLF00']
df_MA_sample_2000['percent_employed']=100*df_MA_sample_2000['EMPCLF00']/df_MA_sample_2000['CLF00']
df_MA_sample_2000['percent_professional']=100*df_MA_sample_2000['PROF00']/df_MA_sample_2000['EMPCLF00']
df_MA_sample_2000['percent_manufacturing']=100*df_MA_sample_2000['MANUF00']/df_MA_sample_2000['EMPCLF00']
df_MA_sample_2000['percent_self_employed']=100*df_MA_sample_2000['SEMP00']/df_MA_sample_2000['EMPCLF00']
df_MA_sample_2000['percent_poverty']=100*df_MA_sample_2000['NPOV00']/df_MA_sample_2000['DPOV00']
df_MA_sample_2000['percent_houses_30yrsago']=100*df_MA_sample_2000['H30OLD00']/df_MA_sample_2000['DMULTI00']
df_MA_sample_2000['percent_multiunit_houses']=100*df_MA_sample_2000['MULTI00']/df_MA_sample_2000['DMULTI00']

In [871]:
# drop the remaining columns
# only keep: per capita income , median household rent, median household income
df_MA_sample_2000.drop(df_MA_sample_2000.columns[1:39],axis=1,inplace=True)

In [872]:
df_MA_sample_2000.drop(df_MA_sample_2000.columns[2:13],axis=1,inplace=True)

In [873]:
df_MA_sample_2000.drop(df_MA_sample_2000.columns[4:9],axis=1,inplace=True)

In [875]:
### convert dtypes to floats so we can do math
df_MA_sample_2000['MHMVAL00'] = pd.to_numeric(df_MA_sample_2000['MHMVAL00'],errors='coerce')
df_MA_sample_2000['MRENT00'] = pd.to_numeric(df_MA_sample_2000['MRENT00'],errors='coerce')

In [876]:
# match id column to 2010
df_MA_sample_2000.rename(columns={'TRTID10':'tractid'},inplace=True)

## File 4: 2010 Census, Short-form questionnaire

In [877]:
# import the file
df_2010_2 = pd.read_csv('CSV_files/LTDB_Std_2010_Sample.csv',sep=',', engine='python')

# subset to MA
df_2010_2_MA= df_2010_2[(df_2010_2['statea']==25)]

In [878]:
# formatting is slightly different because this data is from the American Community Survey
# so we need to amend our lists
Cambridge_tracts_ACS = list((range(3522,3531)))+list(((range(3532,3551))))
Cambridge_tracts_ACS = [str(x)+'00' for x in Cambridge_tracts_ACS]
Cambridge_tracts_ACS = Cambridge_tracts_ACS + ['352101','352102','353102','353101']

Brookline_tracts_ACS = list(range(4001,4013))
Brookline_tracts_ACS = [str(x)+'00' for x in Brookline_tracts_ACS]

Somerville_tracts_ACS = [3507,3508,3505,3504,3506,3503,3502,3513,3511,3510,3509,3515,3501.04,3501.03,3514.03,3512.03,3512.04]
Somerville_tracts_ACS = [str(x).replace(".","") if str(x).find('.') != -1 else str(x)+'00' for x in Somerville_tracts_ACS]

In [879]:
df_2010_2_MA['tracta'] = df_2010_2_MA['tracta'].astype('str')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [880]:
# get Suffolk County
df_MA_sample_2010_1 = df_2010_2_MA[df_2010_2_MA['countya']==25]

# get Cambridge
df_MA_sample_2010_2=df_2010_2_MA[df_2010_2_MA['tracta'].apply(lambda x: any(tract for tract in Cambridge_tracts_ACS if tract in x))]

# get Brookline
df_MA_sample_2010_3=df_2010_2_MA[df_2010_2_MA['tracta'].apply(lambda x: any(tract for tract in Brookline_tracts_ACS if tract in x))]

# get Somervile
df_MA_sample_2010_4=df_2010_2_MA[df_2010_2_MA['tracta'].apply(lambda x: any(tract for tract in Somerville_tracts_ACS if tract in x))]

In [881]:
print(df_MA_sample_2010_1.shape)
print(df_MA_sample_2010_2.shape)
print(df_MA_sample_2010_3.shape)
print(df_MA_sample_2010_4.shape)

(204, 176)
(32, 176)
(12, 176)
(17, 176)


In [882]:
# merge the 3
df_MA_sample_2010= pd.concat([df_MA_sample_2010_1,df_MA_sample_2010_2,df_MA_sample_2010_3,df_MA_sample_2010_4],ignore_index=True)

# reset the index
df_MA_sample_2010.reset_index(inplace=True, drop=True)

In [883]:
df_MA_sample_2010.shape

(265, 176)

### Feature engineering

In [884]:
# feature engineer the one column that is missing
df_MA_sample_2010['percent_employed12']=100*(df_MA_sample_2010['empclf12']/df_MA_sample_2010['clf12'])

In [885]:
# force these to be in the same order as the 2000 sample file
df_MA_sample_2010 = df_MA_sample_2010[['tractid','incpc12','mhmval12','mrent12','pfb12','pnat12','p10imm12','polang12','phs12','pcol12','pmar12',
                 'punemp12','percent_employed12','pprof12','pmanuf12','psemp12','ppov12','p30old12','pmulti12']]

## Percentage change 2000-2000, short-form questionnaire

In [947]:
# merge so we can calculate % change
df_joined_sample = df_MA_sample_2000.merge(df_MA_sample_2010, on='tractid')

In [948]:
df_joined_sample.shape

(264, 37)

In [949]:
# this is where 2000 data starts 
i = 1
# this is where 2010 data starts
j = 19
# stop once you get to final column
while j < 37:

    df_joined_sample[f'{df_joined_sample.columns[i]}_PC'] = df_joined_sample.iloc[:,1:].apply(lambda x: 100*
                                    ((x[df_joined_sample.columns[j]]-x[df_joined_sample.columns[i]])
                                     /x[df_joined_sample.columns[i]]),axis=1)
        
    i +=1
    j +=1

  # Remove the CWD from sys.path while we load stuff.


In [950]:
df_joined_sample.head()

Unnamed: 0,tractid,INCPC00,MHMVAL00,MRENT00,percent_foreign_born,percent_naturalized,percent_recent_immigrants(10),percent_other_languages,percent_hs_degree_orless,percent_4yrcollege_degree_ormore,percent_married,percent_unemployed,percent_employed,percent_professional,percent_manufacturing,percent_self_employed,percent_poverty,percent_houses_30yrsago,percent_multiunit_houses,incpc12,mhmval12,mrent12,pfb12,pnat12,p10imm12,polang12,phs12,pcol12,pmar12,punemp12,percent_employed12,pprof12,pmanuf12,psemp12,ppov12,p30old12,pmulti12,INCPC00_PC,MHMVAL00_PC,MRENT00_PC,percent_foreign_born_PC,percent_naturalized_PC,percent_recent_immigrants(10)_PC,percent_other_languages_PC,percent_hs_degree_orless_PC,percent_4yrcollege_degree_ormore_PC,percent_married_PC,percent_unemployed_PC,percent_employed_PC,percent_professional_PC,percent_manufacturing_PC,percent_self_employed_PC,percent_poverty_PC,percent_houses_30yrsago_PC,percent_multiunit_houses_PC
0,25025000100,21999.0,222100.0,898.0,26.033266,11.265121,10.509073,34.381663,43.822674,40.988372,32.633698,4.170074,95.829926,41.894198,5.588737,11.348123,13.463986,81.412639,85.439901,27564,348200,1348,29.030001,15.3,9.48,38.450001,54.939999,30.780001,27.690001,6.06,94.380531,22.5,7.83,7.13,14.66,94.529999,78.639999,25.296604,56.776227,50.111359,11.511174,35.817451,-9.792235,11.83287,25.368886,-24.90553,-15.149056,45.321175,-1.512466,-46.293279,40.103205,-37.170225,8.883061,16.11219,-7.958695
1,25025000201,27675.0,265600.0,991.0,25.752508,11.757139,11.474145,26.682565,29.159148,47.347528,29.92059,1.925134,98.074866,49.109415,8.106143,6.288622,8.696774,92.630241,80.813215,38128,532000,1405,25.129999,14.55,7.07,29.030001,32.290001,53.32,27.059999,1.89,98.404032,45.75,5.98,8.02,10.13,95.980003,83.279999,37.770551,100.301205,41.775984,-2.417276,23.754597,-38.383205,8.797639,10.737119,12.614115,-9.560608,-1.825001,0.335627,-6.840674,-26.228789,27.531915,16.479972,3.616272,3.052451
2,25025000202,23602.0,276700.0,820.0,33.248408,15.312102,14.394904,36.802373,43.3161,38.810931,39.981813,2.715725,97.284275,47.996398,7.744259,5.718145,12.193878,96.503958,79.94723,29292,432800,1299,36.240002,17.66,12.23,41.509998,32.82,43.119999,33.09,9.6,91.285081,45.950001,1.62,8.85,18.200001,96.260002,78.57,24.108126,56.41489,58.414634,8.997706,15.33361,-15.039384,12.791636,-24.231407,11.102718,-17.237369,253.496788,-6.166663,-4.263648,-79.081279,54.770479,49.255236,-0.252793,-1.722674
3,25025000301,24920.0,214400.0,971.0,24.391105,12.213202,11.43664,21.726079,36.535797,44.618938,33.253205,1.617735,98.382265,53.349574,3.958587,7.003654,8.985399,86.30742,79.063604,41955,409700,1469,15.2,6.1,6.1,19.83,26.52,61.369999,34.970001,4.36,97.350649,63.82,5.92,7.74,8.99,93.970001,76.589996,68.358748,91.091418,51.287333,-37.6822,-50.054047,-46.662655,-8.727202,-27.413652,37.542492,5.162799,169.512601,-1.048579,19.626072,49.54831,10.513736,0.051206,8.878241,-3.12863
4,25025000302,35207.00128,259499.9961,1240.999993,25.428194,16.633729,7.312253,21.2938,25.886993,57.424443,33.655568,3.50213,96.49787,59.097599,9.612556,5.738107,5.566535,79.821962,81.379821,35219,322200,1436,34.490002,13.92,17.9,42.18,31.459999,56.759998,33.619999,3.02,97.389969,56.02,5.47,2.68,16.74,94.82,75.019997,0.03408,24.161852,15.713135,35.636849,-16.314614,144.794584,98.085829,21.528211,-1.157077,-0.105685,-13.766754,0.924476,-5.207653,-43.09526,-53.2947,200.725668,18.789363,-7.81499


In [951]:
df_joined_sample.to_csv('CSV_files/Sample_count_census_changes.csv')