In [1]:
#Initial imports 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score, plot_confusion_matrix
from sklearn.metrics import confusion_matrix, plot_roc_curve, precision_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier, StackingRegressor, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB
import xgboost as xgb

pd.set_option('display.max_columns', None)

In [53]:
df = pd.read_sas('../../Data/nsch_2020_topical.sas7bdat')
mask = {
    1.0: 1.0,
    2.0: 0.0
}
df['Target'] = df['K2Q35A'].map(mask)
df.dropna(subset=['Target'], inplace=True)

y = df['Target']
X = df.drop(columns=['Target', 'K2Q35A', 'K2Q35A_1_YEARS', 'K2Q35B', 'K2Q35C', 'K2Q35D', 'AUTISMMED', 'AUTISMTREAT'])

drop_cols = []
for col in X.columns:
    null_count = X[col].isna().sum()
    null_perc = X[col].isna().sum() / len(X)
    if null_perc >= 0.5:
        drop_cols.append(col)

X = X.drop(columns=drop_cols)

In [57]:
drop_cols

['K6Q41R_STILL',
 'K6Q42R_NEVER',
 'K6Q43R_NEVER',
 'K6Q13A',
 'K6Q13B',
 'K6Q14A',
 'K6Q14B',
 'K4Q28X01',
 'K4Q28X02',
 'K4Q28X03',
 'K4Q28X_EAR',
 'K4Q28X04',
 'K4Q28X05',
 'SESPLANYR',
 'SESPLANMO',
 'K4Q37',
 'SPCSERVMO',
 'LIVEUSA_YR',
 'LIVEUSA_MO',
 'A1_LIVEUSA',
 'A2_LIVEUSA',
 'HANDS',
 'COORDINATION',
 'ERRANDALONE',
 'ALLERGIES_CURR',
 'ARTHRITIS_CURR',
 'K2Q40B',
 'K2Q61B',
 'K2Q41B',
 'K2Q42B',
 'HEART_BORN',
 'HEART_CURR',
 'HEADACHE_CURR',
 'K2Q38B',
 'K2Q33B',
 'K2Q32B',
 'SICKLECELL',
 'THALASSEMIA',
 'BLOOD_OTHER',
 'BLOOD_SCREEN',
 'CYSTFIB_SCREEN',
 'GENETIC_SCREEN',
 'K2Q34B',
 'K2Q36B',
 'K2Q60B',
 'K2Q37B',
 'K2Q30B',
 'K2Q31B',
 'K2Q31D',
 'ADDTREAT',
 'SEEKCARE',
 'CONFIRMINJURY',
 'K6Q40',
 'DOCPRIVATE',
 'K6Q10',
 'K6Q12',
 'NOTELIG',
 'AVAILABLE',
 'APPOINTMENT',
 'TRANSPORTCC',
 'NOTOPEN',
 'ISSUECOST',
 'SESCURRSVC',
 'K4Q38',
 'K5Q21',
 'TREATCHILD',
 'TREATADULT',
 'MEDHISTORY',
 'WRITEPLAN',
 'RECEIVECOPY',
 'HEALTHKNOW',
 'KEEPINSADULT',
 'K12Q01_A',


In [58]:
y.value_counts()

0.0    41343
1.0     1268
Name: Target, dtype: int64

1268 autistic children is likely enough to undersample instead of oversampling; which is ideal since we have many binary columns in our dataframe.

In [59]:
X.shape

(42611, 259)

In [61]:
for col in df.columns:
    null_count = df[col].isna().sum()
    null_perc = df[col].isna().sum() / len(df)
    print(f'{col} null count: {null_count}')
    print(f'{col} null percent: {null_perc}\n')

FIPSST null count: 0
FIPSST null percent: 0.0

STRATUM null count: 0
STRATUM null percent: 0.0

HHID null count: 0
HHID null percent: 0.0

FORMTYPE null count: 0
FORMTYPE null percent: 0.0

TOTKIDS_R null count: 0
TOTKIDS_R null percent: 0.0

TENURE null count: 0
TENURE null percent: 0.0

HHLANGUAGE null count: 151
HHLANGUAGE null percent: 0.0035436859027011803

SC_AGE_YEARS null count: 0
SC_AGE_YEARS null percent: 0.0

SC_SEX null count: 0
SC_SEX null percent: 0.0

K2Q35A_1_YEARS null count: 41453
K2Q35A_1_YEARS null percent: 0.9728239187064373

BIRTH_MO null count: 260
BIRTH_MO null percent: 0.0061017108258430925

BIRTH_YR null count: 379
BIRTH_YR null percent: 0.008894416934594354

MOMAGE null count: 907
MOMAGE null percent: 0.02128558353476802

K6Q41R_STILL null count: 32554
K6Q41R_STILL null percent: 0.763981131632677

K6Q42R_NEVER null count: 30586
K6Q42R_NEVER null percent: 0.717795874304757

K6Q43R_NEVER null count: 30633
K6Q43R_NEVER null percent: 0.718898875877121

K6Q13A nul

ONEWORD null count: 32064
ONEWORD null percent: 0.7524817535378189

TWOWORDS null count: 32064
TWOWORDS null percent: 0.7524817535378189

THREEWORDS null count: 32064
THREEWORDS null percent: 0.7524817535378189

ASKQUESTION null count: 32064
ASKQUESTION null percent: 0.7524817535378189

ASKQUESTION2 null count: 32064
ASKQUESTION2 null percent: 0.7524817535378189

TELLSTORY null count: 32064
TELLSTORY null percent: 0.7524817535378189

UNDERSTAND null count: 32064
UNDERSTAND null percent: 0.7524817535378189

DIRECTIONS null count: 32064
DIRECTIONS null percent: 0.7524817535378189

POINT null count: 32064
POINT null percent: 0.7524817535378189

DIRECTIONS2 null count: 32064
DIRECTIONS2 null percent: 0.7524817535378189

UNDERSTAND2 null count: 32064
UNDERSTAND2 null percent: 0.7524817535378189

RHYMEWORD null count: 35998
RHYMEWORD null percent: 0.8448053319565371

REPEATED null count: 12538
REPEATED null percent: 0.29424327051700266

K7Q30 null count: 12587
K7Q30 null percent: 0.295393208

K4Q02_R null percent: 0.1928140620966417

DENTISTVISIT null count: 8594
DENTISTVISIT null percent: 0.20168501091267513

K4Q22_R null count: 175
K4Q22_R null percent: 0.00410692074816362

TREATNEED null count: 36947
TREATNEED null percent: 0.8670765764708643

K4Q24_R null count: 277
K4Q24_R null percent: 0.006500668841378987

K4Q26 null count: 35679
K4Q26 null percent: 0.8373190021355988

C4Q04 null count: 199
C4Q04 null percent: 0.004670155593626059

HOSPITALER null count: 146
HOSPITALER null percent: 0.0034263453098965055

K4Q04_R null count: 204
K4Q04_R null percent: 0.004787496186430734

K5Q11 null count: 35236
K5Q11 null percent: 0.8269226256131046

K5Q20_R null count: 7044
K5Q20_R null percent: 0.16530942714322594

K5Q22 null count: 40884
K5Q22 null percent: 0.9594705592452653

K5Q30 null count: 22613
K5Q30 null percent: 0.5306845650184224

K5Q32 null count: 38983
K5Q32 null percent: 0.914857665860928

K5Q31_R null count: 6962
K5Q31_R null percent: 0.16338504142122925

K8Q21 null 

In [42]:
drop_cols

['K6Q41R_STILL',
 'K6Q42R_NEVER',
 'K6Q43R_NEVER',
 'K6Q13A',
 'K6Q13B',
 'K6Q14A',
 'K6Q14B',
 'K4Q28X01',
 'K4Q28X02',
 'K4Q28X03',
 'K4Q28X_EAR',
 'K4Q28X04',
 'K4Q28X05',
 'SESPLANYR',
 'SESPLANMO',
 'K4Q37',
 'SPCSERVMO',
 'LIVEUSA_YR',
 'LIVEUSA_MO',
 'A1_LIVEUSA',
 'A2_LIVEUSA',
 'HANDS',
 'COORDINATION',
 'ALLERGIES_CURR',
 'ARTHRITIS_CURR',
 'K2Q40B',
 'K2Q61B',
 'K2Q41B',
 'K2Q42B',
 'HEART_BORN',
 'HEART_CURR',
 'HEADACHE_CURR',
 'K2Q38B',
 'K2Q33B',
 'K2Q32B',
 'SICKLECELL',
 'THALASSEMIA',
 'BLOOD_OTHER',
 'BLOOD_SCREEN',
 'CYSTFIB_SCREEN',
 'GENETIC_SCREEN',
 'K2Q34B',
 'K2Q36B',
 'K2Q60B',
 'K2Q37B',
 'K2Q30B',
 'K2Q31B',
 'K2Q31D',
 'ADDTREAT',
 'SEEKCARE',
 'CONFIRMINJURY',
 'K6Q40',
 'DOCPRIVATE',
 'K6Q10',
 'K6Q12',
 'NOTELIG',
 'AVAILABLE',
 'APPOINTMENT',
 'TRANSPORTCC',
 'NOTOPEN',
 'ISSUECOST',
 'SESCURRSVC',
 'K4Q38',
 'TREATADULT',
 'RECEIVECOPY',
 'KEEPINSADULT',
 'K12Q01_A',
 'K12Q01_B',
 'K12Q01_C',
 'K12Q01_D',
 'K12Q01_E',
 'K12Q01_F',
 'K12Q01_G',
 'ONEWO

In [62]:
X.head()

Unnamed: 0,FIPSST,STRATUM,HHID,FORMTYPE,TOTKIDS_R,TENURE,HHLANGUAGE,SC_AGE_YEARS,SC_SEX,BIRTH_MO,BIRTH_YR,MOMAGE,K4Q32X01,K4Q32X02,K4Q32X03,K4Q32X04,K4Q32X05,DENTALSERV1,DENTALSERV2,DENTALSERV3,DENTALSERV4,DENTALSERV5,DENTALSERV6,DENTALSERV7,K11Q43R,A1_AGE,A2_AGE,HHCOUNT,FAMCOUNT,BREATHING,SWALLOWING,STOMACH,PHYSICALPAIN,TOOTHACHES,GUMBLEED,CAVITIES,MEMORYCOND,WALKSTAIRS,DRESSING,K2Q43B,BLINDNESS,ALLERGIES,ARTHRITIS,K2Q40A,K2Q61A,K2Q41A,K2Q42A,HEART,HEADACHE,K2Q38A,K2Q33A,K2Q32A,DOWNSYN,BLOOD,CYSTFIB,GENETIC,K2Q34A,K2Q36A,K2Q60A,K2Q37A,K2Q30A,K2Q31A,CONCUSSION,K2Q05,S4Q01,OVERWEIGHT,K4Q01,USUALGO,USUALSICK,K4Q31_R,K4Q23,ALTHEALTH,K4Q27,HOSPITALSTAY,K6Q15,K4Q36,K5Q10,DECISIONS,CURRCOV,K12Q03,K12Q04,K12Q12,TRICARE,K11Q03R,HCCOVOTH,K3Q25,STOPWORK,CUTHOURS,AVOIDCHG,REPEATED,K7Q30,K7Q31,K7Q32,K7Q37,K7Q38,BORNUSA,K8Q35,EMOSUPSPO,EMOSUPFAM,EMOSUPHCP,EMOSUPWOR,EMOSUPADV,EMOSUPPEER,EMOSUPMHP,EMOSUPOTH,K9Q40,K11Q60,K11Q61,K11Q62,S9Q34,K10Q11,K10Q12,K10Q13,K10Q14,K10Q20,K10Q22,K10Q23,K9Q96,ACE3,ACE4,ACE5,ACE6,ACE7,ACE8,ACE9,ACE10,ACE12,A1_SEX,A1_BORN,A1_EMPLOYED,A1_GRADE,A1_MARITAL,A1_RELATION,A2_SEX,A2_BORN,A2_EMPLOYED,A2_GRADE,A2_MARITAL,A2_RELATION,A1_ACTIVE,A2_ACTIVE,A1_PHYSHEALTH,A1_MENTHEALTH,A2_PHYSHEALTH,A2_MENTHEALTH,K2Q01,K2Q01_D,K5Q40,K5Q41,K5Q42,K5Q43,K5Q44,K3Q20,K3Q22,K3Q21B,K6Q71_R,K7Q84_R,K7Q85_R,K7Q82_R,K7Q83_R,K7Q70_R,BULLIED_R,BULLY,TALKABOUT,WKTOSOLVE,STRENGTHS,HOPEFUL,K10Q30,K10Q31,K10Q40_R,GOFORHELP,K10Q41_R,K8Q31,K8Q32,K8Q34,HOWMUCH,ATHOMEHC,ARRANGEHC,K7Q02R_R,K7Q04R_R,PHYSACTIV,HOURSLEEP,SCREENTIME,K8Q11,FOODSIT,HCABILITY,K4Q20R,DOCROOM,WGTCONC,K4Q02_R,DENTISTVISIT,K4Q22_R,K4Q24_R,C4Q04,HOSPITALER,K4Q04_R,K5Q20_R,K5Q31_R,K8Q21,K8Q30,K7Q33,BEDTIME,K3Q04_R,ACE1,MAKEFRIEND,K4Q30_R,MENBEVCOV,YEAR,METRO_YN,MPC_YN,TOTAGE_0_5,TOTAGE_6_11,TOTAGE_12_17,TOTCSHCN,TOTNONSHCN,SC_RACE_R,SC_HISPANIC_R,SC_CSHCN,SC_ENGLISH,SC_K2Q10,SC_K2Q13,SC_K2Q16,SC_K2Q19,SC_K2Q22,SC_AGE_LT4,SC_AGE_LT6,SC_AGE_LT9,SC_AGE_LT10,AGEPOS4,TENURE_IF,TOTMALE,TOTFEMALE,SC_RACE_R_IF,SC_RACER,SC_HISPANIC_R_IF,SC_SEX_IF,BIRTHWT_OZ_S,HOUSE_GEN,FAMILY_R,CURRINS,INSGAP,INSTYPE,HIGRADE,HIGRADE_TVIS,BIRTHWT_VL,BIRTHWT_L,BIRTHWT,FPL_IF,A1_GRADE_IF,BIRTH_YR_F,BMICLASS,HHCOUNT_IF,FPL_I1,FPL_I2,FPL_I3,FPL_I4,FPL_I5,FPL_I6,FWC
0,b'17',b'1',b'20000003',b'T1',2.0,1.0,1.0,3.0,1.0,8.0,2017.0,26.0,,,,,,,,,,,,,1.0,29.0,33.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,,2.0,2.0,2.0,,,,,,,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,2.0,1.0,2.0,7.0,1.0,1.0,1.0,1.0,1.0,7.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,2.0,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,2.0,2.0,1.0,6.0,6.0,,,,,1.0,4.0,1.0,1.0,2.0,3.0,3.0,1.0,,3.0,3.0,1.0,1.0,3.0,3.0,3.0,,1.0,,1.0,1.0,1.0,1.0,3.0,5.0,2020.0,1.0,2.0,2.0,0.0,0.0,1.0,1.0,1.0,2.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,92.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,2.0,2.0,3.0,0.0,0.0,0.0,,0.0,400.0,400.0,400.0,400.0,400.0,400.0,3296.080092
1,b'29',b'2A',b'20000004',b'T3',1.0,1.0,1.0,14.0,2.0,2.0,2006.0,31.0,,,,,,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,43.0,,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,,,,,,,,,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,7.0,2.0,1.0,,,,,,8.0,1.0,,2.0,2.0,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,6.0,6.0,3.0,1.0,2.0,4.0,5.0,2.0,1.0,3.0,2.0,2.0,3.0,1.0,2.0,1.0,3.0,1.0,1.0,1.0,3.0,3.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,2020.0,1.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,112.0,3.0,6.0,1.0,1.0,2.0,3.0,4.0,2.0,2.0,3.0,0.0,0.0,0.0,2.0,0.0,400.0,400.0,400.0,400.0,400.0,400.0,2888.54533
2,b'47',b'1',b'20000005',b'T1',1.0,1.0,1.0,1.0,2.0,10.0,2018.0,28.0,,,,,,,,,,,,,0.0,30.0,43.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,,,,,,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,2.0,1.0,1.0,8.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,2.0,5.0,6.0,6.0,,,,,1.0,3.0,1.0,1.0,3.0,2.0,3.0,1.0,,3.0,1.0,1.0,1.0,1.0,2.0,3.0,,1.0,,2.0,1.0,1.0,,3.0,1.0,2020.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,91.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,2.0,2.0,3.0,0.0,0.0,0.0,,0.0,400.0,400.0,400.0,400.0,400.0,400.0,1016.68273
3,b'28',b'1',b'20000014',b'T3',2.0,1.0,1.0,15.0,2.0,10.0,2004.0,29.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,44.0,43.0,4.0,4.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,7.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,4.0,3.0,3.0,3.0,4.0,4.0,,,,,,1.0,1.0,4.0,2.0,2.0,2.0,1.0,1.0,4.0,2.0,1.0,3.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,3.0,6.0,6.0,1.0,1.0,2.0,4.0,5.0,4.0,2.0,2.0,,,3.0,1.0,3.0,3.0,3.0,2.0,1.0,1.0,,,1.0,2.0,1.0,3.0,1.0,2.0,1.0,1.0,1.0,2020.0,2.0,,0.0,0.0,2.0,0.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,101.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,2.0,2.0,3.0,0.0,0.0,0.0,2.0,0.0,143.0,143.0,143.0,143.0,143.0,143.0,1042.091065
4,b'55',b'1',b'20000015',b'T3',2.0,2.0,1.0,16.0,2.0,8.0,2004.0,24.0,1.0,2.0,2.0,2.0,2.0,,,,,,,,2.0,42.0,42.0,4.0,4.0,2.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,7.0,1.0,1.0,2.0,1.0,2.0,7.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,2.0,5.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,3.0,1.0,2.0,2.0,1.0,3.0,,1.0,1.0,3.0,1.0,2.0,2.0,1.0,3.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2020.0,1.0,2.0,0.0,0.0,2.0,2.0,0.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,134.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,2.0,2.0,3.0,0.0,0.0,0.0,3.0,0.0,400.0,400.0,400.0,400.0,400.0,400.0,402.372392


In [66]:
X[0:10].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 9
Columns: 259 entries, FIPSST to FWC
dtypes: float64(255), object(4)
memory usage: 20.3+ KB
