# 7 Testing the hypothesis for wheelchair group

# This script contains the following:
### 1. Import data and libraries
### 2. Data wrangling
### 3. Data cleaning
### 4. Questioning
### 5. Dealing with possible bias

## 1. Import data and libraries

In [1]:
#Importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import os
import sklearn
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
%matplotlib inline

In [3]:
#Importing Data
path = r'/Users/marialuciaguarita/Phyton Careerfoundry Project 2'
Data_s = pd.read_csv(os.path.join(path,'Data_s_cleaned'), index_col = False)

In [4]:
Data_s.shape

(9905, 38)

# 2. Data wrangling

In [5]:
Data_s.columns

Index(['Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'id2',
       'region', 'area', 'age', 'adapt_bath', 'adapt_room', 'handrails',
       'property', 'salary', 'retirement', 'work', 'wheelchair', 'device',
       'device_type', 'd_get_up_chair', 'd_get_up_bed', 'dining_out',
       'concert', 'museum', 'leisure_trip', 'volunteer_work', 'diffic_to_eat',
       'have_TV', 'have_mobile', 'hours_of_help', 'n_children',
       'device_for_the_test', 'bedridden', 'diffic_run', 'diffic_walk',
       'diffic_walk_oneblock', 'diffic_climbing', 'time_walk_day',
       'household_income'],
      dtype='object')

In [6]:
Data_s = Data_s[['id2', 'region', 'area','age', 'adapt_bath', 'adapt_room', 'handrails', 'property', 'salary','retirement', 'work', 'wheelchair', 'device', 'device_type','d_get_up_chair', 'd_get_up_bed', 'dining_out', 'concert', 'museum','leisure_trip', 'volunteer_work', 'diffic_to_eat', 'have_TV','have_mobile', 'hours_of_help', 'n_children', 'device_for_the_test','bedridden', 'diffic_run', 'diffic_walk', 'diffic_walk_oneblock','diffic_climbing', 'time_walk_day', 'household_income']]
Data_s.columns 

Index(['id2', 'region', 'area', 'age', 'adapt_bath', 'adapt_room', 'handrails',
       'property', 'salary', 'retirement', 'work', 'wheelchair', 'device',
       'device_type', 'd_get_up_chair', 'd_get_up_bed', 'dining_out',
       'concert', 'museum', 'leisure_trip', 'volunteer_work', 'diffic_to_eat',
       'have_TV', 'have_mobile', 'hours_of_help', 'n_children',
       'device_for_the_test', 'bedridden', 'diffic_run', 'diffic_walk',
       'diffic_walk_oneblock', 'diffic_climbing', 'time_walk_day',
       'household_income'],
      dtype='object')

### Selecting people with walking impariment bedridden


Does the responded have difficulty walking 100 meters? = (0) No,(1) Yes and (9) Didn’t know/didn’t answer

Considering: check whether the respondent is bedridden = (1) Yes, for less than 3 months; (2) Yes, for 3 months or more
; (3) No

In [7]:
Data_s_disabled = Data_s.copy()
Data_s_disabled.drop(Data_s_disabled.index[Data_s_disabled['diffic_walk_oneblock']==0],inplace=True)
Data_s_disabled.drop(Data_s_disabled.index[Data_s_disabled['diffic_walk_oneblock']==9],inplace=True)

In [8]:
Data_s_disabled.drop(Data_s_disabled.index[Data_s_disabled['bedridden']==3],inplace=True)
Data_s_disabled.drop(Data_s_disabled.index[Data_s_disabled['bedridden']==3],inplace=True)

In [9]:
Data_s_disabled.shape

(157, 34)

### Selecting people with walking impariment, bedrriden, without a wheelchair

Does the respondent use a wheelchair ? = (1) Yes, for less than 3 months or (2) Yes, for 3 months or more and (3) No

In [10]:
Data_s_disabled_wochair = Data_s_disabled.copy()
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['wheelchair']==1],inplace=True)

In [11]:
Data_s_disabled_wochair = Data_s_disabled.copy()
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['wheelchair']==2],inplace=True)

In [12]:
Data_s_disabled_wochair.shape

(94, 34)

### Selecting people with a wheelchair with walking impariment 

In [13]:
Data_s_disabled_wchair = Data_s_disabled.copy()
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['wheelchair']==3],inplace=True)

In [14]:
Data_s_disabled_wchair.shape

(80, 34)

# 4. Questioning

Does people with a wheelchair and walking impariment spend more often going to the concert, dining out and to the museumm than people with walking impariment and no wheelchair?

1- CONCERT
(1) At least once a month 
(2) At least twice a year 
(3) Less than twice a year 
(4) Never 
(9) Didn’t know/didn’t answer 

In [15]:
#Excluding Didn't know and didn't answer from data and do not go to the concert
Data_s_disabled_wochairc = Data_s_disabled_wochair.copy()
Data_s_disabled_wochairc.drop(Data_s_disabled_wochairc.index[Data_s_disabled_wochairc['concert']==9],inplace=True)
Data_s_disabled_wchairc = Data_s_disabled_wchair.copy()
Data_s_disabled_wchairc.drop(Data_s_disabled_wchairc.index[Data_s_disabled_wchairc['concert']==9],inplace=True)

In [16]:
Data_s_disabled_wochairc.drop(Data_s_disabled_wochairc.index[Data_s_disabled_wochairc['concert']==4],inplace=True)
Data_s_disabled_wchairc.drop(Data_s_disabled_wchairc.index[Data_s_disabled_wchairc['concert']==4],inplace=True)

In [17]:
Data_s_disabled_wochairc['concert'].describe()

count    4.0
mean     1.0
std      0.0
min      1.0
25%      1.0
50%      1.0
75%      1.0
max      1.0
Name: concert, dtype: float64

In [18]:
Data_s_disabled_wchairc['concert'].describe()

count    3.000000
mean     1.666667
std      1.154701
min      1.000000
25%      1.000000
50%      1.000000
75%      2.000000
max      3.000000
Name: concert, dtype: float64

2- DINING OUT
(1) At least once a month 
(2) At least twice a year 
(3) Less than twice a year 
(4) Never 
(9) Didn’t know/didn’t answer 

In [19]:
#Excluding Didn't know and didn't answer from data and do not go to dine out
Data_s_disabled_wochaird = Data_s_disabled_wochair.copy()
Data_s_disabled_wochaird.drop(Data_s_disabled_wochaird.index[Data_s_disabled_wochaird['dining_out']==9],inplace=True)
Data_s_disabled_wchaird = Data_s_disabled_wchair.copy()
Data_s_disabled_wchaird.drop(Data_s_disabled_wchaird.index[Data_s_disabled_wchaird['dining_out']==9],inplace=True)

In [20]:
Data_s_disabled_wochaird.drop(Data_s_disabled_wochaird.index[Data_s_disabled_wochaird['dining_out']==4],inplace=True)
Data_s_disabled_wchaird.drop(Data_s_disabled_wchaird.index[Data_s_disabled_wchaird['dining_out']==4],inplace=True)

In [21]:
Data_s_disabled_wochaird['dining_out'].describe()

count    7.000000
mean     1.428571
std      0.534522
min      1.000000
25%      1.000000
50%      1.000000
75%      2.000000
max      2.000000
Name: dining_out, dtype: float64

In [22]:
Data_s_disabled_wchaird['dining_out'].describe()

count    7.000000
mean     1.857143
std      0.899735
min      1.000000
25%      1.000000
50%      2.000000
75%      2.500000
max      3.000000
Name: dining_out, dtype: float64

3- MUSEUM
(1) At least once a month 
(2) At least twice a year 
(3) Less than twice a year 
(4) Never 
(9) Didn’t know/didn’t answer 

In [23]:
#Excluding Didn't know and didn't answer from data and do not go to museum
Data_s_disabled_wochairm = Data_s_disabled_wochair.copy()
Data_s_disabled_wochairm.drop(Data_s_disabled_wochairm.index[Data_s_disabled_wochairm['museum']==9],inplace=True)
Data_s_disabled_wchairm = Data_s_disabled_wchair.copy()
Data_s_disabled_wchairm.drop(Data_s_disabled_wchairm.index[Data_s_disabled_wchairm['museum']==9],inplace=True)

In [24]:
Data_s_disabled_wochairm.drop(Data_s_disabled_wochairm.index[Data_s_disabled_wochairm['museum']==4],inplace=True)
Data_s_disabled_wchairm.drop(Data_s_disabled_wchairm.index[Data_s_disabled_wchairm['museum']==4],inplace=True)

In [25]:
Data_s_disabled_wochairm['museum'].describe()

count    4.0
mean     1.5
std      1.0
min      1.0
25%      1.0
50%      1.0
75%      1.5
max      3.0
Name: museum, dtype: float64

In [26]:
Data_s_disabled_wchairm['museum'].describe()

count    4.000000
mean     2.000000
std      1.154701
min      1.000000
25%      1.000000
50%      2.000000
75%      3.000000
max      3.000000
Name: museum, dtype: float64

### Comparing hours of volunteer work between who has or not have a wheelchair

|__|__|__| hours per week 
(0) Did not perform that kind of work or performed it less than 1 hour per week 
(888) Does not apply 
(999) Didn’t know/didn’t answer 

In [27]:
#Excluding Didn't know / didn't answer, does not reply or did not perform 
Data_s_disabled_wochairv = Data_s_disabled_wochair.copy()
Data_s_disabled_wochairv.drop(Data_s_disabled_wochairv.index[Data_s_disabled_wochairv['volunteer_work']==0],inplace=True)
Data_s_disabled_wochairv.drop(Data_s_disabled_wochairv.index[Data_s_disabled_wochairv['volunteer_work']==888],inplace=True)
Data_s_disabled_wochairv.drop(Data_s_disabled_wochairv.index[Data_s_disabled_wochairv['volunteer_work']==999],inplace=True)


In [28]:
Data_s_disabled_wochairv['volunteer_work'].describe()

count    1.0
mean     5.0
std      NaN
min      5.0
25%      5.0
50%      5.0
75%      5.0
max      5.0
Name: volunteer_work, dtype: float64

In [29]:
#Excluding Didn't know / didn't answer, does not reply or did not perform 
Data_s_disabled_wchairv = Data_s_disabled_wchair.copy()
Data_s_disabled_wchairv.drop(Data_s_disabled_wchairv.index[Data_s_disabled_wchairv['volunteer_work']==0],inplace=True)
Data_s_disabled_wchairv.drop(Data_s_disabled_wchairv.index[Data_s_disabled_wchairv['volunteer_work']==888],inplace=True)
Data_s_disabled_wchairv.drop(Data_s_disabled_wchairv.index[Data_s_disabled_wchairv['volunteer_work']==999],inplace=True)


In [30]:
Data_s_disabled_wchairv['volunteer_work'].describe()

count    1.0
mean     5.0
std      NaN
min      5.0
25%      5.0
50%      5.0
75%      5.0
max      5.0
Name: volunteer_work, dtype: float64

# 5. Dealing with possible bias

Testing functionality between groups

(0) No 
(1) Yes 
(9) Didn’t know/didn’t answer 

In [31]:
#Excluding Didn't know / didn't answer, does not apply from difficulties in wheelchair group
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['diffic_to_eat']==9],inplace=True)
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['diffic_to_eat']==0],inplace=True)
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['d_get_up_bed']==9],inplace=True)
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['d_get_up_bed']==0],inplace=True)
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['d_get_up_chair']==9],inplace=True)
Data_s_disabled_wchair.drop(Data_s_disabled_wchair.index[Data_s_disabled_wchair['d_get_up_chair']==0],inplace=True)

In [32]:
Data_s_disabled_wchair ['functionality'] = Data_s_disabled_wchair ['diffic_to_eat'] + Data_s_disabled_wchair['d_get_up_chair'] + Data_s_disabled_wchair['d_get_up_bed']                                                                                                                                        
                                                                                                                                                                                                                                                                           

In [33]:
Data_s_disabled_wchair ['functionality'].describe()

count    68.000000
mean      6.602941
std       4.911575
min       3.000000
25%       3.000000
50%       3.000000
75%      10.000000
max      17.000000
Name: functionality, dtype: float64

In [34]:
#Excluding Didn't know / didn't answer, does not apply from difficulties in wheelchair group
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['diffic_to_eat']==9],inplace=True)
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['diffic_to_eat']==0],inplace=True)
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['d_get_up_bed']==9],inplace=True)
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['d_get_up_bed']==0],inplace=True)
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['d_get_up_chair']==9],inplace=True)
Data_s_disabled_wochair.drop(Data_s_disabled_wochair.index[Data_s_disabled_wochair['d_get_up_chair']==0],inplace=True)

In [35]:
Data_s_disabled_wochair ['functionality'] = Data_s_disabled_wochair ['diffic_to_eat'] + Data_s_disabled_wochair['d_get_up_chair'] + Data_s_disabled_wochair['d_get_up_bed']                                                                                                                                        

In [36]:
Data_s_disabled_wochair ['functionality'].describe()

count    80.000000
mean      7.987500
std       5.808099
min       3.000000
25%       3.000000
50%       3.000000
75%      10.000000
max      17.000000
Name: functionality, dtype: float64