In [1]:
# imports and dependencies
import pandas as pd
import numpy as np

In [2]:
# reading in data, and info
df = pd.read_csv('adult.csv')
print(df.head())
print(df.info())
print(df.describe())

   age  workclass  fnlwgt     education  educational-num      marital-status  \
0   25    Private  226802          11th                7       Never-married   
1   38    Private   89814       HS-grad                9  Married-civ-spouse   
2   28  Local-gov  336951    Assoc-acdm               12  Married-civ-spouse   
3   44    Private  160323  Some-college               10  Married-civ-spouse   
4   18          ?  103497  Some-college               10       Never-married   

          occupation relationship   race  gender  capital-gain  capital-loss  \
0  Machine-op-inspct    Own-child  Black    Male             0             0   
1    Farming-fishing      Husband  White    Male             0             0   
2    Protective-serv      Husband  White    Male             0             0   
3  Machine-op-inspct      Husband  Black    Male          7688             0   
4                  ?    Own-child  White  Female             0             0   

   hours-per-week native-country incom

In [3]:
# drop rows that are incomplete
for index, row in df.iterrows():
    if '?' in row.values:
        df.drop(index, inplace=True)
    
print(len(df))

45222


In [4]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K


In [6]:
# 
column_names = df.columns
print(column_names)

Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')


In [7]:
# 2 renaming columns for database csv
new_column_names = {
    'age': 'Age',
    'workclass': 'Workclass',
    'fnlwgt': 'Fnlwgt',
    'education': 'Education',
    'educational-num': 'Educational-Grade',
    'marital-status': 'Marital-Status',
    'occupation': 'Occupation',
    'relationship': 'Relationship',
    'race': 'Race',
    'gender': 'Gender',
    'capital-gain': 'Capital-Gain',
    'capital-loss': 'Capital-Loss',
    'hours-per-week': 'Hours-Worked-Per-Week',
    'native-country': 'Native-Country',
    'income': 'Annual-Income' 
}

df = df.rename(columns=new_column_names)

In [4]:
# dropping unecisari columns for analysis
columns_to_drop = ['fnlwgt', 'educational-num', 'relationship', 'capital-gain', 'capital-loss']

df.drop(columns=columns_to_drop, inplace=True)
print(df.columns)

Index(['age', 'workclass', 'education', 'marital-status', 'occupation', 'race',
       'gender', 'hours-per-week', 'native-country', 'income'],
      dtype='object')


In [5]:
# Ages 18+ only
df = df[df['age'] >= 18]
print(len(df))

44729


In [6]:
# age counts
age_counts = df['age'].value_counts()
print(df['age'].min())
print(df['age'].max())
print(age_counts)

18
90
age
36    1283
33    1279
31    1274
35    1272
23    1241
      ... 
85       5
88       5
89       1
87       1
86       1
Name: count, Length: 73, dtype: int64


In [7]:
# Age bins
age_bins = [0, 19, 28, 38, 48, 58, 68, 78, np.inf]

age_labels = ['18-27', '28-37', '38-47', '48-57', '58-67', '68-77', '78-87', '88+']

df['age_group'] = pd.cut(df['age'], bins=age_bins, labels=age_labels, right=False)

age_dummies = pd.get_dummies(df['age_group']).astype(int)

df = pd.concat([df, age_dummies], axis=1)

df.drop(columns=['age_group'], inplace=True)

In [8]:
print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [9]:
# workclass count
workclass_counts = df['workclass'].value_counts()
print(workclass_counts)

workclass
Private             32856
Self-emp-not-inc     3787
Local-gov            3079
State-gov            1944
Self-emp-inc         1638
Federal-gov          1404
Without-pay            21
Name: count, dtype: int64


In [10]:
# workclass columns
one_hot_encoded = pd.get_dummies(df['workclass'])

one_hot_encoded = one_hot_encoded.astype(int)

df = pd.concat([df, one_hot_encoded], axis=1)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [11]:
# education count
education_counts = df['education'].value_counts()
print(education_counts)

education
HS-grad         14771
Some-college     9896
Bachelors        7570
Masters          2514
Assoc-voc        1959
Assoc-acdm       1507
11th             1387
10th             1073
7th-8th           819
Prof-school       785
9th               644
Doctorate         544
12th              520
5th-6th           446
1st-4th           222
Preschool          72
Name: count, dtype: int64


In [12]:
# education bins
edu_bins = {
    'Pre-school': ['Preschool'],
    'Elementary': ['1st-4th', '5th-6th'],
    'Middle': ['7th-8th'],
    'High School': ['9th', '10th', '11th', '12th', 'HS-grad'],
    'Some College': ['Some-college'],
    'Associate': ['Assoc-voc', 'Assoc-acdm'],
    'Bachelors': ['Bachelors'],
    'Masters': ['Masters'],
    'Doctorate': ['Doctorate']
}

for bin_name, bin_values in edu_bins.items():
    df[bin_name] = df['education'].isin(bin_values).astype(int)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [13]:
# marital-status count
marital_counts = df['marital-status'].value_counts()
print(marital_counts)

marital-status
Married-civ-spouse       21053
Never-married            14108
Divorced                  6297
Separated                 1411
Widowed                   1277
Married-spouse-absent      551
Married-AF-spouse           32
Name: count, dtype: int64


In [14]:
# marital-status columns
one_hot_encoded = pd.get_dummies(df['marital-status'])

one_hot_encoded = one_hot_encoded.astype(int)

df = pd.concat([df, one_hot_encoded], axis=1)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [15]:
# occupation count
occupation_counts = df['occupation'].value_counts()
print(occupation_counts)

occupation
Craft-repair         6002
Prof-specialty       5992
Exec-managerial      5981
Adm-clerical         5501
Sales                5272
Other-service        4624
Machine-op-inspct    2967
Transport-moving     2311
Handlers-cleaners    1989
Farming-fishing      1463
Tech-support         1419
Protective-serv       972
Priv-house-serv       222
Armed-Forces           14
Name: count, dtype: int64


In [16]:
# occupation columns
one_hot_encoded = pd.get_dummies(df['occupation'])

one_hot_encoded = one_hot_encoded.astype(int)

df = pd.concat([df, one_hot_encoded], axis=1)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [17]:
# race count
occupation_counts = df['race'].value_counts()
print(occupation_counts)

race
White                 38455
Black                  4195
Asian-Pac-Islander     1302
Amer-Indian-Eskimo      429
Other                   348
Name: count, dtype: int64


In [18]:
# race columns
one_hot_encoded = pd.get_dummies(df['race'])

one_hot_encoded = one_hot_encoded.astype(int)

df = pd.concat([df, one_hot_encoded], axis=1)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [19]:
# gender columns
one_hot_encoded = pd.get_dummies(df['gender'])

one_hot_encoded = one_hot_encoded.astype(int)

df = pd.concat([df, one_hot_encoded], axis=1)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [20]:
# hours count
hours_counts = df['hours-per-week'].value_counts()
print(df['hours-per-week'].min())
print(df['hours-per-week'].max())
print(hours_counts)

1
99
hours-per-week
40    21319
50     4093
45     2598
60     2085
35     1762
      ...  
79        1
69        1
87        1
94        1
82        1
Name: count, Length: 96, dtype: int64


In [21]:
# hour bins
hour_bins = [0, 34, 44, 60, np.inf]
hour_labels = ['1-34', '35-44', '45-60', '60+']

df['hours-per-week-bins'] = pd.cut(df['hours-per-week'], bins=hour_bins, labels=hour_labels, right=False)

hour_dummies = pd.get_dummies(df['hours-per-week-bins']).astype(int)

df = pd.concat([df, hour_dummies], axis=1)

df.drop(columns=['hours-per-week-bins'], inplace=True)

print(df.head)

<bound method NDFrame.head of        age     workclass     education      marital-status         occupation  \
0       25       Private          11th       Never-married  Machine-op-inspct   
1       38       Private       HS-grad  Married-civ-spouse    Farming-fishing   
2       28     Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv   
3       44       Private  Some-college  Married-civ-spouse  Machine-op-inspct   
5       34       Private          10th       Never-married      Other-service   
...    ...           ...           ...                 ...                ...   
48837   27       Private    Assoc-acdm  Married-civ-spouse       Tech-support   
48838   40       Private       HS-grad  Married-civ-spouse  Machine-op-inspct   
48839   58       Private       HS-grad             Widowed       Adm-clerical   
48840   22       Private       HS-grad       Never-married       Adm-clerical   
48841   52  Self-emp-inc       HS-grad  Married-civ-spouse    Exec-managerial  

In [22]:
# native-country count
country_counts = df['native-country'].value_counts()
print(country_counts)

native-country
United-States                 40821
Mexico                          896
Philippines                     282
Germany                         193
Puerto-Rico                     173
Canada                          161
India                           146
El-Salvador                     144
Cuba                            133
England                         118
China                           113
Jamaica                         103
South                           101
Italy                           100
Dominican-Republic               97
Japan                            89
Guatemala                        85
Vietnam                          83
Columbia                         81
Poland                           80
Haiti                            68
Portugal                         62
Iran                             56
Taiwan                           55
Greece                           49
Nicaragua                        48
Peru                             44
Ecuador      

In [23]:
# native-country bins
country_bins = ['United States', 'Other-Countries']
country_labels = ['United States', 'Other-Countries']

country_mapping = {'United-States': 'United States'}

df['country-bins'] = df['native-country'].map(country_mapping).fillna('Other-Countries')

country_dummies = pd.get_dummies(df['country-bins']).astype(int)

df = pd.concat([df, country_dummies], axis=1)

df.drop(columns=['native-country', 'country-bins'], inplace=True)

print(df.head())

   age  workclass     education      marital-status         occupation   race  \
0   25    Private          11th       Never-married  Machine-op-inspct  Black   
1   38    Private       HS-grad  Married-civ-spouse    Farming-fishing  White   
2   28  Local-gov    Assoc-acdm  Married-civ-spouse    Protective-serv  White   
3   44    Private  Some-college  Married-civ-spouse  Machine-op-inspct  Black   
5   34    Private          10th       Never-married      Other-service  White   

  gender  hours-per-week income  18-27  ...  Other  White  Female  Male  1-34  \
0   Male              40  <=50K      0  ...      0      0       0     1     0   
1   Male              50  <=50K      0  ...      0      1       0     1     0   
2   Male              40   >50K      0  ...      0      1       0     1     0   
3   Male              40   >50K      0  ...      0      0       0     1     0   
5   Male              30  <=50K      0  ...      0      1       0     1     1   

   35-44  45-60  60+  Othe

In [24]:
print(df.columns)

Index(['age', 'workclass', 'education', 'marital-status', 'occupation', 'race',
       'gender', 'hours-per-week', 'income', '18-27', '28-37', '38-47',
       '48-57', '58-67', '68-77', '78-87', '88+', 'Federal-gov', 'Local-gov',
       'Private', 'Self-emp-inc', 'Self-emp-not-inc', 'State-gov',
       'Without-pay', 'Pre-school', 'Elementary', 'Middle', 'High School',
       'Some College', 'Associate', 'Bachelors', 'Masters', 'Doctorate',
       'Divorced', 'Married-AF-spouse', 'Married-civ-spouse',
       'Married-spouse-absent', 'Never-married', 'Separated', 'Widowed',
       'Adm-clerical', 'Armed-Forces', 'Craft-repair', 'Exec-managerial',
       'Farming-fishing', 'Handlers-cleaners', 'Machine-op-inspct',
       'Other-service', 'Priv-house-serv', 'Prof-specialty', 'Protective-serv',
       'Sales', 'Tech-support', 'Transport-moving', 'Amer-Indian-Eskimo',
       'Asian-Pac-Islander', 'Black', 'Other', 'White', 'Female', 'Male',
       '1-34', '35-44', '45-60', '60+', 'Other-Cou

In [25]:
# removing columns, moving income column to the end
columns_to_remove = ['age', 'workclass', 'education', 'marital-status', 'occupation', 'race',
       'gender', 'hours-per-week']

df = df.drop(columns=columns_to_remove)

income_column = df.pop('income')
df['income'] = income_column 

print(df.columns)

Index(['18-27', '28-37', '38-47', '48-57', '58-67', '68-77', '78-87', '88+',
       'Federal-gov', 'Local-gov', 'Private', 'Self-emp-inc',
       'Self-emp-not-inc', 'State-gov', 'Without-pay', 'Pre-school',
       'Elementary', 'Middle', 'High School', 'Some College', 'Associate',
       'Bachelors', 'Masters', 'Doctorate', 'Divorced', 'Married-AF-spouse',
       'Married-civ-spouse', 'Married-spouse-absent', 'Never-married',
       'Separated', 'Widowed', 'Adm-clerical', 'Armed-Forces', 'Craft-repair',
       'Exec-managerial', 'Farming-fishing', 'Handlers-cleaners',
       'Machine-op-inspct', 'Other-service', 'Priv-house-serv',
       'Prof-specialty', 'Protective-serv', 'Sales', 'Tech-support',
       'Transport-moving', 'Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Black',
       'Other', 'White', 'Female', 'Male', '1-34', '35-44', '45-60', '60+',
       'Other-Countries', 'United States', 'income'],
      dtype='object')


In [26]:
df.head()

Unnamed: 0,18-27,28-37,38-47,48-57,58-67,68-77,78-87,88+,Federal-gov,Local-gov,...,White,Female,Male,1-34,35-44,45-60,60+,Other-Countries,United States,income
0,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,1,<=50K
1,0,0,0,1,0,0,0,0,0,0,...,1,0,1,0,0,1,0,0,1,<=50K
2,0,0,1,0,0,0,0,0,0,1,...,1,0,1,0,1,0,0,0,1,>50K
3,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,1,>50K
5,0,0,1,0,0,0,0,0,0,0,...,1,0,1,1,0,0,0,0,1,<=50K


In [27]:
# renaming columns
new_column_names = {
    'old_column_name1': 'new_column_name1',
    'old_column_name2': 'new_column_name2',
    '18-27': 'ages 18-27',
    '28-37': 'ages 28-37',
    '38-47': 'ages 38-47',
    '48-57': 'ages 48-57',
    '58-67': 'ages 58-67',
    '68-77': 'ages 68-77',
    '78-87': 'ages 78-87', 
    '88+': 'ages 88+',
    'Federal-gov': 'workclass Federal-gov', 
    'Local-gov': 'workclass Local-gov',
    'Private': 'workclass Private',
    'Self-emp-inc': 'workclass Self-emp-inc',
    'Self-emp-not-inc': 'workclass Self-emp-not-inc',
    'State-gov': 'workclass State-gov',
    'Without-pay': 'workclass Without-pay',
    'Pre-school': 'education Pre-school',
    'Elementary': 'education Elementary',
    'Middle': 'education Middle School',
    'High School': 'education High School', 
    'Some College': 'education Some College',
    'Associate': 'education Associate Degree',
    'Bachelors': 'education Bachelors',
    'Masters': 'education Masters',
    'Doctorate': 'education Doctorate',
    'Divorced': 'marital-status Divorced',
    'Married-AF-spouse': 'marital-status Married-AF-spouse',
    'Married-civ-spouse': 'marital-status Married-civ-spouse',
    'Married-spouse-absent': 'marital-status Married-spouse-absent',
    'Never-married': 'marital-status Never-married',
    'Separated': 'marital-status Separated',
    'Widowed': 'marital-status Widowed',
    'Adm-clerical': 'occupation Adm-clerical',
    'Armed-Forces': 'occupation Armed-Forces',
    'Craft-repair': 'occupation Craft-repair',
    'Exec-managerial': 'occupation Exec-managerial',
    'Farming-fishing': 'occupation Farming-fishing',
    'Handlers-cleaners': 'occupation Handlers-cleaners',
    'Machine-op-inspct': 'occupation Machine-op-inspct',
    'Other-service': 'occupation Other-services',
    'Priv-house-serv': 'occupation Priv-house-serv',
    'Prof-specialty': 'occupation Prof-specialty',
    'Protective-serv': 'occupation Protective-serv',
    'Sales': 'occupation Sales',
    'Tech-support': 'occupation Tech-support',
    'Transport-moving': 'occupation Transport-moving',
    'Amer-Indian-Eskimo': 'race Amer-Indian-Eskimo',
    'Asian-Pac-Islander': 'race Asian-Pac-Islander',
    'Black': 'race Black',
    'Other': 'race Other',
    'White': 'race White',
    'Female': 'gender Female',
    'Male': 'gender Male',
    '1-34': 'Hours-per-week 1-34',
    '35-44': 'Hours-per-week 35-44',
    '45-60': 'Hours-per-week 45-60',
    '60+': 'Hours-per-week 60+',
    'Other-Countries': 'Native Country Other-Countries',
    'United States': 'Native Country United States'
}

df = df.rename(columns=new_column_names)

In [28]:
df.head()

Unnamed: 0,ages 18-27,ages 28-37,ages 38-47,ages 48-57,ages 58-67,ages 68-77,ages 78-87,ages 88+,workclass Federal-gov,workclass Local-gov,...,race White,gender Female,gender Male,Hours-per-week 1-34,Hours-per-week 35-44,Hours-per-week 45-60,Hours-per-week 60+,Native Country Other-Countries,Native Country United States,income
0,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,1,<=50K
1,0,0,0,1,0,0,0,0,0,0,...,1,0,1,0,0,1,0,0,1,<=50K
2,0,0,1,0,0,0,0,0,0,1,...,1,0,1,0,1,0,0,0,1,>50K
3,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,1,>50K
5,0,0,1,0,0,0,0,0,0,0,...,1,0,1,1,0,0,0,0,1,<=50K


In [29]:
# export to csv
df.to_csv('Ready_csv.csv', index=False)