# 8 Testing the hypothesis for device group

# This script contains the following:
### 1. Import data and libraries
### 2. Data wrangling
### 3. Data cleaning
### 4. Questioning

## 1. Import data and libraries

In [1]:
#Importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import os
import sklearn
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
%matplotlib inline

In [3]:
#Importing Data
path = r'/Users/marialuciaguarita/Phyton Careerfoundry Project 2'
Data_s = pd.read_csv(os.path.join(path,'Data_s_cleaned'), index_col = False)

In [4]:
Data_s.shape

(9905, 38)

In [5]:
Data_s.columns

Index(['Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'id2',
       'region', 'area', 'age', 'adapt_bath', 'adapt_room', 'handrails',
       'property', 'salary', 'retirement', 'work', 'wheelchair', 'device',
       'device_type', 'd_get_up_chair', 'd_get_up_bed', 'dining_out',
       'concert', 'museum', 'leisure_trip', 'volunteer_work', 'diffic_to_eat',
       'have_TV', 'have_mobile', 'hours_of_help', 'n_children',
       'device_for_the_test', 'bedridden', 'diffic_run', 'diffic_walk',
       'diffic_walk_oneblock', 'diffic_climbing', 'time_walk_day',
       'household_income'],
      dtype='object')

In [6]:
Data_s = Data_s[['id2', 'region', 'area','age', 'adapt_bath', 'adapt_room', 'handrails', 'property', 'salary','retirement', 'work', 'wheelchair', 'device', 'device_type','d_get_up_chair', 'd_get_up_bed', 'dining_out', 'concert', 'museum','leisure_trip', 'volunteer_work', 'diffic_to_eat', 'have_TV','have_mobile', 'hours_of_help', 'n_children', 'device_for_the_test','bedridden', 'diffic_run', 'diffic_walk', 'diffic_walk_oneblock','diffic_climbing', 'time_walk_day', 'household_income']]
Data_s.columns 

Index(['id2', 'region', 'area', 'age', 'adapt_bath', 'adapt_room', 'handrails',
       'property', 'salary', 'retirement', 'work', 'wheelchair', 'device',
       'device_type', 'd_get_up_chair', 'd_get_up_bed', 'dining_out',
       'concert', 'museum', 'leisure_trip', 'volunteer_work', 'diffic_to_eat',
       'have_TV', 'have_mobile', 'hours_of_help', 'n_children',
       'device_for_the_test', 'bedridden', 'diffic_run', 'diffic_walk',
       'diffic_walk_oneblock', 'diffic_climbing', 'time_walk_day',
       'household_income'],
      dtype='object')

In [7]:
Data_s['age'].describe()

count    9905.000000
mean       66.647350
std         9.890062
min        50.000000
25%        58.000000
50%        65.000000
75%        74.000000
max       109.000000
Name: age, dtype: float64

In [8]:
Data_s['household_income'].describe()

count     9905.000000
mean      2787.594427
std       2788.781923
min          0.000000
25%       1400.000000
50%       2080.000000
75%       3000.000000
max      80000.000000
Name: household_income, dtype: float64

### Selecting people with walking impariment 

Does the responded have difficulty walking 100 meters? = (0) No,(1) Yes and (9) Didn’t know/didn’t answer

In [9]:
Data_s_disabled = Data_s.copy()
Data_s_disabled.drop(Data_s_disabled.index[Data_s_disabled['diffic_walk_oneblock']==0],inplace=True)

In [10]:
Data_s_disabled.shape

(2592, 34)

In [11]:
Data_s_disabled.drop(Data_s_disabled.index[Data_s_disabled['diffic_walk_oneblock']==9],inplace=True)

In [12]:
Data_s_disabled.shape

(2514, 34)

### Selecting people with walking impariment without a device

We considered that people with difficult do walk 100 meters and is not bedridden would need a walker or a pair of crutches

Considering: check whether the respondent is bedridden = (1) Yes, for less than 3 months; (2) Yes, for 3 months or more
; (3) No

In [13]:
Data_s_disabled_nobedridden = Data_s_disabled.copy()
Data_s_disabled_nobedridden.drop(Data_s_disabled_nobedridden.index[Data_s_disabled_nobedridden['bedridden']==2],inplace=True)
Data_s_disabled_nobedridden.shape

(2397, 34)

In [14]:
Data_s_disabled_nobedridden.drop(Data_s_disabled_nobedridden.index[Data_s_disabled_nobedridden['bedridden']==1],inplace=True)
Data_s_disabled_nobedridden.shape

(2357, 34)

In [15]:
Data_s_disabled_nobedridden_nodevice = Data_s_disabled_nobedridden.copy()
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['device']==1],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['device']==9],inplace=True)
Data_s_disabled_nobedridden_nodevice.shape

(1925, 34)

### Selecting people with walking impariment with a device 

In [16]:
Data_s_disabled_nobedridden_device = Data_s_disabled_nobedridden.copy()
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['device']==0],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['device']==9],inplace=True)
Data_s_disabled_nobedridden_device.shape

(366, 34)

# 4. Questioning

Does people with a DEVICE and walking impariment spend more often going to the concert, dining out and to the museumm than people with walking impariment and no DEVICE?

1- CONCERT
(1) At least once a month 
(2) At least twice a year 
(3) Less than twice a year 
(4) Never 
(9) Didn’t know/didn’t answer 

In [17]:
#Excluding Didn't know and didn't answer from data and do not go to the concert
Data_s_disabled_nobedridden_nodevicec = Data_s_disabled_nobedridden_nodevice.copy()
Data_s_disabled_nobedridden_nodevicec.drop(Data_s_disabled_nobedridden_nodevicec.index[Data_s_disabled_nobedridden_nodevicec['concert']==9],inplace=True)
Data_s_disabled_nobedridden_devicec = Data_s_disabled_nobedridden_device.copy()
Data_s_disabled_nobedridden_devicec.drop(Data_s_disabled_nobedridden_devicec.index[Data_s_disabled_nobedridden_devicec['concert']==9],inplace=True)

In [18]:
Data_s_disabled_nobedridden_nodevicec.drop(Data_s_disabled_nobedridden_nodevicec.index[Data_s_disabled_nobedridden_nodevicec['concert']==4],inplace=True)
Data_s_disabled_nobedridden_devicec.drop(Data_s_disabled_nobedridden_devicec.index[Data_s_disabled_nobedridden_devicec['concert']==4],inplace=True)

In [19]:
Data_s_disabled_nobedridden_nodevicec['concert'].describe()

count    100.000000
mean       2.030000
std        0.936952
min        1.000000
25%        1.000000
50%        2.000000
75%        3.000000
max        3.000000
Name: concert, dtype: float64

In [20]:
Data_s_disabled_nobedridden_devicec['concert'].describe()

count    16.000000
mean      1.812500
std       0.981071
min       1.000000
25%       1.000000
50%       1.000000
75%       3.000000
max       3.000000
Name: concert, dtype: float64

2- DINING OUT
(1) At least once a month 
(2) At least twice a year 
(3) Less than twice a year 
(4) Never 
(9) Didn’t know/didn’t answer 

In [21]:
#Excluding Didn't know and didn't answer from data and do not go to dine out
Data_s_disabled_nobedridden_nodeviced = Data_s_disabled_nobedridden_nodevice.copy()
Data_s_disabled_nobedridden_nodeviced.drop(Data_s_disabled_nobedridden_nodeviced.index[Data_s_disabled_nobedridden_nodeviced['dining_out']==9],inplace=True)
Data_s_disabled_nobedridden_deviced = Data_s_disabled_nobedridden_device.copy()
Data_s_disabled_nobedridden_deviced.drop(Data_s_disabled_nobedridden_deviced.index[Data_s_disabled_nobedridden_deviced['dining_out']==9],inplace=True)

In [22]:
Data_s_disabled_nobedridden_nodeviced.drop(Data_s_disabled_nobedridden_nodeviced.index[Data_s_disabled_nobedridden_nodeviced['dining_out']==4],inplace=True)
Data_s_disabled_nobedridden_deviced.drop(Data_s_disabled_nobedridden_deviced.index[Data_s_disabled_nobedridden_deviced['dining_out']==4],inplace=True)


In [23]:
Data_s_disabled_nobedridden_nodeviced['dining_out'].describe()

count    438.000000
mean       1.956621
std        0.861290
min        1.000000
25%        1.000000
50%        2.000000
75%        3.000000
max        3.000000
Name: dining_out, dtype: float64

In [24]:
Data_s_disabled_nobedridden_deviced['dining_out'].describe()

count    66.000000
mean      1.954545
std       0.867034
min       1.000000
25%       1.000000
50%       2.000000
75%       3.000000
max       3.000000
Name: dining_out, dtype: float64

3- MUSEUM
(1) At least once a month 
(2) At least twice a year 
(3) Less than twice a year 
(4) Never 
(9) Didn’t know/didn’t answer 

In [25]:
#Excluding Didn't know and didn't answer from data and do not go to dine out
Data_s_disabled_nobedridden_nodevicem = Data_s_disabled_nobedridden_nodevice.copy()
Data_s_disabled_nobedridden_nodevicem.drop(Data_s_disabled_nobedridden_nodevicem.index[Data_s_disabled_nobedridden_nodevicem['museum']==9],inplace=True)
Data_s_disabled_nobedridden_devicem = Data_s_disabled_nobedridden_device.copy()
Data_s_disabled_nobedridden_devicem.drop(Data_s_disabled_nobedridden_devicem.index[Data_s_disabled_nobedridden_devicem['museum']==9],inplace=True)

In [26]:
Data_s_disabled_nobedridden_nodevicem.drop(Data_s_disabled_nobedridden_nodevicem.index[Data_s_disabled_nobedridden_nodevicem['museum']==4],inplace=True)
Data_s_disabled_nobedridden_devicem.drop(Data_s_disabled_nobedridden_devicem.index[Data_s_disabled_nobedridden_devicem['museum']==4],inplace=True)

In [27]:
Data_s_disabled_nobedridden_nodevicem['museum'].describe()

count    69.000000
mean      1.898551
std       0.957093
min       1.000000
25%       1.000000
50%       1.000000
75%       3.000000
max       3.000000
Name: museum, dtype: float64

In [28]:
Data_s_disabled_nobedridden_devicem['museum'].describe()

count    14.000000
mean      1.714286
std       0.994490
min       1.000000
25%       1.000000
50%       1.000000
75%       3.000000
max       3.000000
Name: museum, dtype: float64

### Comparing hours of volunteer work between who has or not have a device

|__|__|__| hours per week 
(0) Did not perform that kind of work or performed it less than 1 hour per week 
(888) Does not apply 
(999) Didn’t know/didn’t answer 

In [29]:
#Excluding Didn't know / didn't answer, does not reply or did not perform 
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['volunteer_work']==0],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['volunteer_work']==888],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['volunteer_work']==999],inplace=True)


In [30]:
Data_s_disabled_nobedridden_nodevice['volunteer_work'].describe()

count    68.000000
mean      5.102941
std       4.401966
min       1.000000
25%       2.000000
50%       3.000000
75%       6.250000
max      20.000000
Name: volunteer_work, dtype: float64

In [31]:
#Excluding Didn't know / didn't answer, does not reply or did not perform 
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['volunteer_work']==0],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['volunteer_work']==888],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['volunteer_work']==999],inplace=True)


In [32]:
Data_s_disabled_nobedridden_device['volunteer_work'].describe()

count     5.000000
mean     19.400000
std      20.683327
min       4.000000
25%       4.000000
50%       5.000000
75%      40.000000
max      44.000000
Name: volunteer_work, dtype: float64

# 5. Dealing with possible bias

Testing functionality between groups

(0) No 
(1) Yes 
(9) Didn’t know/didn’t answer 

In [33]:
#Excluding Didn't know / didn't answer, does not apply from difficulties in wheelchair group
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['diffic_to_eat']==9],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['diffic_to_eat']==8],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['d_get_up_bed']==9],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['d_get_up_bed']==8],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['d_get_up_chair']==9],inplace=True)
Data_s_disabled_nobedridden_nodevice.drop(Data_s_disabled_nobedridden_nodevice.index[Data_s_disabled_nobedridden_nodevice['d_get_up_chair']==8],inplace=True)

In [34]:
Data_s_disabled_nobedridden_nodevice['diffic_to_eat'].describe()

count    1.0
mean     1.0
std      NaN
min      1.0
25%      1.0
50%      1.0
75%      1.0
max      1.0
Name: diffic_to_eat, dtype: float64

In [35]:
#Excluding Didn't know / didn't answer, does not apply from difficulties in wheelchair group
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['diffic_to_eat']==9],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['diffic_to_eat']==8],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['d_get_up_bed']==9],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['d_get_up_bed']==8],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['d_get_up_chair']==9],inplace=True)
Data_s_disabled_nobedridden_device.drop(Data_s_disabled_nobedridden_device.index[Data_s_disabled_nobedridden_device['d_get_up_chair']==8],inplace=True)

In [36]:
Data_s_disabled_nobedridden_device['diffic_to_eat'].describe()

count    1.0
mean     0.0
std      NaN
min      0.0
25%      0.0
50%      0.0
75%      0.0
max      0.0
Name: diffic_to_eat, dtype: float64