In [105]:
# Importing
import pandas as pd
import altair as alt
import numpy as np

from IPython.display import Markdown
from IPython.display import display
from tabulate import tabulate
pd.set_option('display.max_columns', 500)

In [106]:
# Load Data
df = pd.read_csv('StarWars.csv', encoding='latin1')

# Rename Columns
new_column_names = {
    'RespondentID': 'id',
    'Have you seen any of the 6 films in the Star Wars franchise?': 'seen_star_wars',
    'Do you consider yourself to be a fan of the Star Wars film franchise?': 'star_wars_fan',
    'Which of the following Star Wars films have you seen? Please select all that apply.': 'seen_film_1',
    'Unnamed: 4': 'seen_film_2',
    'Unnamed: 5': 'seen_film_3',
    'Unnamed: 6': 'seen_film_4',
    'Unnamed: 7': 'seen_film_5',
    'Unnamed: 8': 'seen_film_6',
    'Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.': 'film_1_preference',
    'Unnamed: 10': 'film_2_preference',
    'Unnamed: 11': 'film_3_preference',
    'Unnamed: 12': 'film_4_preference',
    'Unnamed: 13': 'film_5_preference',
    'Unnamed: 14': 'film_6_preference',
    'Please state whether you view the following characters favorably, unfavorably, or are unfamiliar with him/her.': 'han_solo_view',
    'Unnamed: 16': 'luke_skywalker_view',
    'Unnamed: 17': 'princess_leia_view',
    'Unnamed: 18': 'anakin_skywalker_view',
    'Unnamed: 19': 'obi_wan_kenobi_view',
    'Unnamed: 20': 'emperor_palpatine_view',
    'Unnamed: 21': 'darth_vader_view',
    'Unnamed: 22': 'lando_calrissian_view',
    'Unnamed: 23': 'boba_fett_view',
    'Unnamed: 24': 'c3p0_view',
    'Unnamed: 25': 'r2d2_view',
    'Unnamed: 26': 'jar_jar_binks_view',
    'Unnamed: 27': 'padme_amidala_view',
    'Unnamed: 28': 'yoda_view',
    'Which character shot first?': 'shot_first',
    'Are you familiar with the Expanded Universe?': 'familiar_expanded_universe',
    'Do you consider yourself to be a fan of the Expanded Universe?æ': 'expanded_universe_fan',
    'Do you consider yourself to be a fan of the Star Trek franchise?': 'star_trek_fan',
    'Gender': 'gender',
    'Age': 'age',
    'Household Income': 'income',
    'Education': 'education',
    'Location (Census Region)': 'region'
}

df.rename(columns=new_column_names, inplace=True)
df = df.drop(0)

In [107]:
# Filter the dataset to respondents that have seen at least one film.
df = df.query('seen_star_wars == "Yes"')
df = df.dropna(subset=['seen_film_1', 'seen_film_2', 'seen_film_3', 'seen_film_4', 'seen_film_5', 'seen_film_6'], how='all')
# Drop rows where 'age' column has NaN values
df = df.dropna(subset=['age'])
# Drop rows where 'education' column has NaN values
df = df.dropna(subset=['education'])

In [108]:
# Create a new column that converts the age ranges to a single number. Drop the age range categorical column.
# df['age'].unique()
age_numerical = {
    '18-29': 0,
    '30-44': 1,
    '45-60': 2,
    '> 60' : 3
}
df['age'] = df['age'].map(age_numerical)

In [109]:
# Create a new column that converts the education groupings to a single number. Drop the school categorical column
# df['education'].unique()
education_numerical = {
    'Less than high school degree': 0,
    'High school degree': 1,
    'Some college or Associate degree': 2,
    'Bachelor degree' : 3,
    'Graduate degree' : 4

}
df['education'] = df['education'].map(education_numerical)

In [110]:
# Create a new column that converts the income ranges to a single number. Drop the income range categorical column.
# filtered_df = df[df['income'].isna()]
# df['income'].unique()
# # filtered_df
income_numerical = {
    np.nan : 0,
    '$0 - $24,999': 1,
    '$25,000 - $49,999': 2,
    '$50,000 - $99,999' : 3,
    '$100,000 - $149,999' : 4,
    '$150,000+' : 5

}
df['income'] = df['income'].map(income_numerical)

In [113]:
# Create your target (also known as “y” or “label”) column based on the new income range column.
df['income50k+'] = (df['income'] >= 3).astype(int)

array(['Very favorably', 'Unfamiliar (N/A)', 'Somewhat favorably',
       'Very unfavorably', 'Neither favorably nor unfavorably (neutral)',
       nan, 'Somewhat unfavorably'], dtype=object)

In [112]:
# One-hot encode all remaining categorical columns.
df['seen_star_wars'] = np.where(df['seen_star_wars'] == 'Yes', 1, 0)
df['star_wars_fan'] = np.where(df['star_wars_fan'] == 'Yes', 1, 0)

df['seen_film_1'] = np.where(df['seen_film_1'] == 'Star Wars: Episode I  The Phantom Menace', 1, 0)
df['seen_film_2'] = np.where(df['seen_film_2'] == 'Star Wars: Episode II  Attack of the Clones', 1, 0)
df['seen_film_3'] = np.where(df['seen_film_3'] == 'Star Wars: Episode III  Revenge of the Sith', 1, 0)
df['seen_film_4'] = np.where(df['seen_film_4'] == 'Star Wars: Episode IV  A New Hope', 1, 0)
df['seen_film_5'] = np.where(df['seen_film_5'] == 'Star Wars: Episode V The Empire Strikes Back', 1, 0)
df['seen_film_6'] = np.where(df['seen_film_6'] == 'Star Wars: Episode VI Return of the Jedi', 1, 0)

df['seen_film_6'] = np.where(df['seen_film_6'] == '', 1, 0)

character_view_columns = ['han_solo_view', 'luke_skywalker_view', 'princess_leia_view', 'anakin_skywalker_view', 
                          'obi_wan_kenobi_view', 'emperor_palpatine_view', 'darth_vader_view',	
                          'lando_calrissian_view', 'boba_fett_view',	
                          'c3p0_view', 'r2d2_view', 'jar_jar_binks_view',	
                          'padme_amidala_view', 'yoda_view']

character_view_numerical = {'Very favorably': , 
                            'Unfamiliar (N/A)':,                           
                            'Somewhat favorably': ,
                            'Very unfavorably': ,
                            'Neither favorably nor unfavorably (neutral)':
       
       np.nan: , 'Somewhat unfavorably':

    
}

for character in character_view_columns:
    df[character] = df[character].map(character_view_numerical)






Unnamed: 0,id,seen_star_wars,star_wars_fan,seen_film_1,seen_film_2,seen_film_3,seen_film_4,seen_film_5,seen_film_6,film_1_preference,film_2_preference,film_3_preference,film_4_preference,film_5_preference,film_6_preference,han_solo_view,luke_skywalker_view,princess_leia_view,anakin_skywalker_view,obi_wan_kenobi_view,emperor_palpatine_view,darth_vader_view,lando_calrissian_view,boba_fett_view,c3p0_view,r2d2_view,jar_jar_binks_view,padme_amidala_view,yoda_view,shot_first,familiar_expanded_universe,expanded_universe_fan,star_trek_fan,gender,age,income,education,region,income50k+
1,3.292880e+09,1,1,1,1,1,1,1,1,3,2,1,4,5,6,Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,Unfamiliar (N/A),Unfamiliar (N/A),Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,I don't understand this question,Yes,No,No,Male,0,0,1,South Atlantic,0
3,3.292765e+09,1,0,1,1,1,0,0,0,1,2,3,4,5,6,Somewhat favorably,Somewhat favorably,Somewhat favorably,Somewhat favorably,Somewhat favorably,Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),Unfamiliar (N/A),I don't understand this question,No,,No,Male,0,1,1,West North Central,0
4,3.292763e+09,1,1,1,1,1,1,1,1,5,6,1,2,4,3,Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,Somewhat favorably,Very favorably,Somewhat favorably,Somewhat unfavorably,Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,I don't understand this question,No,,Yes,Male,0,4,2,West North Central,1
5,3.292731e+09,1,1,1,1,1,1,1,1,5,4,6,2,1,3,Very favorably,Somewhat favorably,Somewhat favorably,Somewhat unfavorably,Very favorably,Very unfavorably,Somewhat favorably,Neither favorably nor unfavorably (neutral),Very favorably,Somewhat favorably,Somewhat favorably,Very unfavorably,Somewhat favorably,Somewhat favorably,Greedo,Yes,No,No,Male,0,4,2,West North Central,1
6,3.292719e+09,1,1,1,1,1,1,1,1,1,4,3,6,5,2,Very favorably,Very favorably,Very favorably,Very favorably,Very favorably,Neither favorably nor unfavorably (neutral),Very favorably,Neither favorably nor unfavorably (neutral),Somewhat favorably,Somewhat favorably,Somewhat favorably,Somewhat favorably,Neither favorably nor unfavorably (neutral),Very favorably,Han,Yes,No,Yes,Male,0,2,3,Middle Atlantic,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1181,3.288390e+09,1,1,1,1,1,1,1,1,3,4,5,2,1,6,Very favorably,Very favorably,Very favorably,Somewhat favorably,Very favorably,Somewhat favorably,Very favorably,Somewhat favorably,Somewhat favorably,Very favorably,Very favorably,Somewhat favorably,Somewhat favorably,Very favorably,Han,No,,No,Female,2,1,2,Pacific,0
1182,3.288389e+09,1,1,1,1,1,1,1,1,5,4,6,3,2,1,Very favorably,Somewhat favorably,Somewhat favorably,Somewhat favorably,Very favorably,Somewhat favorably,Somewhat favorably,Somewhat favorably,Somewhat favorably,Very favorably,Very favorably,Somewhat favorably,Somewhat favorably,Very favorably,Han,No,,Yes,Female,0,1,2,East North Central,0
1183,3.288379e+09,1,1,1,1,1,1,1,1,4,5,6,2,3,1,Very favorably,Somewhat favorably,Very favorably,Somewhat unfavorably,Very favorably,Neither favorably nor unfavorably (neutral),Very unfavorably,Somewhat favorably,Unfamiliar (N/A),Somewhat favorably,Very favorably,Somewhat unfavorably,Somewhat unfavorably,Very favorably,I don't understand this question,No,,Yes,Female,1,3,3,Mountain,1
1185,3.288373e+09,1,1,1,1,1,1,1,1,4,3,6,5,2,1,Very favorably,Neither favorably nor unfavorably (neutral),Very favorably,Very favorably,Very favorably,Neither favorably nor unfavorably (neutral),Very favorably,Somewhat favorably,Very favorably,Somewhat favorably,Somewhat favorably,Very favorably,Somewhat favorably,Very favorably,Han,No,,Yes,Female,2,4,2,East North Central,1
