In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
from scipy import stats
import os
from ReadData import ReadFile
from OneYearData import OneYearData


In [2]:
#Adult=ReadFile(2014,'samadult')
#Adult.isnull().sum()

In [3]:
#Child=ReadFile(2014,'samchild')
#Child.isnull().sum()

In [4]:
def TransformData(year):
        
    ##############################
    ##         Family           ##
    ##############################
        
    # Run the read-file function to pull the family data from the sas and ascii files
    Familydf = ReadFile(year,'familyxx')
    
    # Reduce family data to only the variables we need for analysis
    Familydf = Familydf.loc[:,['SRVY_YR','HHX','FMX', 'FM_SIZE', 'FM_TYPE', 'FSRUNOUT', 'FSLAST', 'WTFA_FAM']]
    Familydf['HH with Food Concern due to Money']=0
    Familydf['HH with Children']=0   
    
    for index, row in Familydf.iterrows():
    
        # Use variables indicating family was worried food would run out
        if Familydf.loc[index, 'FSRUNOUT'] in (1,2):
            Familydf.loc[index, 'HH with Food Concern due to Money'] = 1
        if Familydf.loc[index, 'FSLAST'] in (1,2):
            Familydf.loc[index, 'HH with Food Concern due to Money'] = 1
        
        # Set family-with-children indicator using the family type variable
        if Familydf.loc[index, 'FM_TYPE'] in (3,4):
            Familydf.loc[index, 'HH with Children'] = 1
            
            
    ##############################
    ##          Adult           ##
    ##############################

    # Run the read-file function to pull the adult data from the sas and ascii files
    Adultdf = ReadFile(year,'samadult')
    
    # Reduce adult data to only the variables we need for analysis
    Adultdf = Adultdf.loc[:,['HHX','FMX','FPX','REGION','WTFA_SA',
                             'COPDEV','AASMEV','AASSTILL','SMKSTAT2']]
    
    # Create placeholders for the variables we will be creating
    Adultdf['HH with Smoker']=0
    
    for index, row in Adultdf.iterrows():
    
        # Use smoking status variable to flag the person as a smoker (or not)
        if Adultdf.loc[index,'SMKSTAT2'] in (1,2,5):
            Adultdf.loc[index,'HH with Smoker'] = 1
    
    ##############################
    ##          Child           ##
    ##############################
    
    # Run the read-file function to pull the adult data from the sas and ascii files
    Childdf = ReadFile(year,'samchild')
    
    # Reduce adult data to only the variables we need for analysis
    Childdf = Childdf.loc[:,['HHX','FMX','FPX','REGION','WTFA_SC',
                             'CASHMEV','CASSTILL', 'ADD2N', 'AUTISMN', 'RSCL6']]
    
    # Create placeholders for the variables we will be creating
    Childdf['HH with Asthmatic Child']=0
    Childdf['HH with ADHD/ADD Child']=0
    Childdf['HH with Autistic Child']=0
    Childdf['HH with Child With Concentration/Emotional/Behavior Issues']=0
    
    for index, row in Childdf.iterrows():
    
        # Check variables related to autism diagnoses
        if Childdf.loc[index,'CASHMEV'] == 1:
            Childdf.loc[index,'HH with Asthmatic Child'] = 1
        elif Childdf.loc[index, 'ADD2N'] == 1:
            Childdf.loc[index,'HH with Asthmatic Child'] = 1
        elif Childdf.loc[index, 'AUTISMN']== 1:
            Childdf.loc[index,'HH with Asthmatic Child'] = 1

        # Check if child has been diagnosed with any concentration, emotional or behavioral issues
        if Childdf.loc[index, 'RSCL6'] in (1,2,3):
            Childdf.loc[index, 'HH with Child With Concentration/Emotional/Behavior Issues'] = 1            
    
    ##############################
    ##   Merge Adult/Child      ##
    ##############################
    
    # Merg the adult and child info into one dataframe
    merge_adult_child = pd.merge(Adultdf, Childdf, on=["FMX"])
    merge_adult_child = merge_adult_child.loc[:,['HHX','FMX','HH with Smoker',
                                                 'HH with Asthmatic Child', 
                                                 'HH with ADHD/ADD Child', 
                                                 'HH with Autistic Child', 
                                                 'HH with Child With Concentration/Emotional/Behavior Issues', 
                                                 'WTFA_SA',
                                                 'WTFA_SC']]
    
    ##############################
    ##   Merge Family info      ##
    ##############################
    family_merge = pd.merge(Familydf, merge_adult_child, on=['HHX', 'FMX'])
    family_merge = family_merge.loc[:,['HHX','FMX', 'SRVY_YR', 'FM_SIZE', 'HH with Children', 
                                       'HH with Smoker','HH with Asthmatic Child', 'HH with Food Concern due to Money', 
                                       'HH with ADHD/ADD Child', 'HH with Autistic Child', 
                                       'HH with Child With Concentration/Emotional/Behavior Issues', 
                                       'WTFA_SA','WTFA_SC', 'WTFA_FAM']]
    family_merge.head()    
    
    numvars = len(family_merge.columns.tolist())
    numrecs = len(family_merge['HHX'])
    print(f"Results:  Final Year {year} Dataframe for year created with {numrecs} rows and {numvars} columms.")

    return family_merge
    
#test = TransformData(2014)
#test.head()

In [5]:
Data2017 = OneYearData(year=2017)
Data2017.head()


Reading in file familyxx for year 2017
Results:  Dataframe created with 33157 rows and 734 columms.
Reading in file samadult for year 2017
Results:  Dataframe created with 26742 rows and 2940 columms.
Reading in file samchild for year 2017
Results:  Dataframe created with 8845 rows and 988 columms.
Results:  adult/child Year 2017 Dataframe created with 6981 rows and 7 columms.
Results:  final Year 2017 Dataframe created with 6981 rows and 11 columms.
Dataframe output to ..\output_data\year_{year}.csv


Unnamed: 0,SRVY_YR,HHX,FMX,FM_SIZE,FM_TYPE,WTFA_FAM,HH with Smoker,HH with Asthmatic Child,HH with ADHD/ADD Child,HH with Autistic Child,HH with Child With Concentration/Emotional/Behavior Issues
0,2017,11,1,4,4,4070,1,0,0,0,0
1,2017,18,1,5,4,2853,1,0,0,0,0
2,2017,19,1,3,4,3454,0,0,0,0,1
3,2017,26,1,2,3,4633,0,1,0,0,0
4,2017,47,1,5,4,3862,0,0,0,0,0


In [6]:
Data2016 = OneYearData(2016)
Data2016.head()


Reading in file familyxx for year 2016
Results:  Dataframe created with 40875 rows and 734 columms.
Reading in file samadult for year 2016
Results:  Dataframe created with 33028 rows and 3338 columms.
Reading in file samchild for year 2016
Results:  Dataframe created with 11107 rows and 1084 columms.
Results:  adult/child Year 2016 Dataframe created with 8820 rows and 7 columms.
Results:  final Year 2016 Dataframe created with 8820 rows and 11 columms.
Dataframe output to ..\output_data\year_{year}.csv


Unnamed: 0,SRVY_YR,HHX,FMX,FM_SIZE,FM_TYPE,WTFA_FAM,HH with Smoker,HH with Asthmatic Child,HH with ADHD/ADD Child,HH with Autistic Child,HH with Child With Concentration/Emotional/Behavior Issues
0,2016,5,1,3,3,3337,1,1,0,0,1
1,2016,10,1,8,4,2318,0,0,0,0,0
2,2016,37,1,5,4,9545,0,0,0,0,0
3,2016,44,1,4,4,2155,0,0,0,0,0
4,2016,46,1,4,4,3306,0,0,0,0,0
