# Load Libraries

In [91]:
from scipy.stats import ttest_ind
import pandas as pd

**Load Data**

In [92]:
df = pd.read_csv(r"C:\Users\theod\Downloads\mhealth_app_review_data.csv")

In [93]:
df.head()

Unnamed: 0,AppID,PermissionToName,AppName,Reviewer,Platform,OS,Whichpopulationsistheapptargeting,Howwouldyoubestcategorizetheappdeveloper,Whatkindsofsupportdoesthedeveloperoffer,Wasanappropriateclinicalexpertinvolvedinappdevelopmentorqualitycontrol,...,iOSNumRatings,AndroidAvgRating,AndroidNumRatings,AndroidNumDownloads,iOSONLYPricetodownloadtheappindollars,iOSONLYPricetopurchaseALLinapppurchasesindollars,iOSONLYAnnualpricetopurchaseasubscriptionindollars,ANDROIDONLYPricetodownloadtheappindollars,ANDROIDONLYPricetopurchaseALLinapppurchasesindollars,ANDROIDONLYAnnualpricetopurchaseasubscriptionindollars
0,5,n,,clinician_reviewer,iPhone,iOS,None of the above,For-profit company,E-mail,No clinical expert involved,...,366.0,,,,22.01,0.0,0.0,,,
1,5,n,,nonclinician_reviewer,iPad or iPad Mini,iOS,All of the above,For-profit company,"E-mail, Phone",No clinical expert involved,...,,,,,15.0,0.0,0.0,,,
2,13,y,Blood Pressure Companion,clinician_reviewer,Android tablet,Android,Hypertension,For-profit company,E-mail,Clinical expert involved,...,,4.0,124.0,"1,000 - 5,000",0.99,1.22,0.0,,,
3,13,y,Blood Pressure Companion,nonclinician_reviewer,Android tablet,Android,Hypertension,For-profit company,E-mail,No clinical expert involved,...,,,,,,,,0.0,1.28,0.0
4,14,y,Blood Pressure Companion,clinician_reviewer,iPhone,iOS,Hypertension,For-profit company,E-mail,No clinical expert involved,...,878.0,,,,0.99,0.0,0.0,,,


**Check Data Types**

In [166]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274 entries, 0 to 273
Data columns (total 51 columns):
 #   Column                                                                                    Non-Null Count  Dtype  
---  ------                                                                                    --------------  -----  
 0   AppID                                                                                     274 non-null    object 
 1   PermissionToName                                                                          274 non-null    object 
 2   AppName                                                                                   100 non-null    object 
 3   Reviewer                                                                                  274 non-null    object 
 4   Platform                                                                                  274 non-null    object 
 5   OS                                                       

**Clean up Relevant Data Columns**

In [145]:
df["iOSNumRatings"].replace({"Taken off app store":None},inplace=True) # Replace such text with nan
df["iOSAvgRating"].replace({"Taken off app store":None},inplace=True) # Replace such text with nan
df["iOSNumRatings"] = df["iOSNumRatings"].astype(float) # Update column data type
df["iOSAvgRating"] = df["iOSNumRatings"].astype(float) # Update column data type

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["iOSNumRatings"].replace({"Taken off app store":None},inplace=True)


In [96]:
# rename column for easier usability
df.rename(columns={"Wasanappropriateclinicalexpertinvolvedinappdevelopmentorqualitycontrol":"ClinicanInvolved"},inplace=True) 

# Statistical Testing

**Independent t-test**

In [169]:
noClinican = df[(df["ClinicanInvolved"] == "No clinical expert involved") & (df["Reviewer"] == "clinician_reviewer")]
Clinican_involved = df[(df["ClinicanInvolved"] == "Clinical expert involved") & (df["Reviewer"] == "clinician_reviewer")]

a = noClinican['iOSNumRatings']
b = Clinican_involved["iOSNumRatings"]

print("Dataset A Mean:",a.mean())
print("Dataseet B Mean:",b.mean())
print("-------------------------------------------------")
print("Dataset A Variance:",a.var())
print("Dataseet B Variance:",b.var())

c = noClinican['AndroidNumRatings']
d = Clinican_involved["AndroidNumRatings"]
print("-------------------------------------------------")
print("Dataseet C Mean:",c.mean())
print("Dataseet D Mean:",d.mean())
print("-------------------------------------------------")
print("Dataseet C Variance:",c.var())
print("Dataseet D Variance:",d.var())

Dataset A Mean: 21418.227272727272
Dataseet B Mean: 1962.111111111111
-------------------------------------------------
Dataset A Variance: 9673600714.374458
Dataseet B Variance: 20225360.36111111
-------------------------------------------------
Dataseet C Mean: 60521.72222222222
Dataseet D Mean: 644.9090909090909
-------------------------------------------------
Dataseet C Variance: 65403734006.56536
Dataseet D Variance: 2043083.2909090905


In [140]:
# Independent T-test
# ( H_0 ): There is no association between clinical expert involvement in app development and the number of iOS ratings.
# ( H_1 ): There is an association between clinical expert involvement in app development and the number of iOS ratings

test_1 = ttest_ind(a,b,equal_var=False,nan_policy='omit',alternative='two-sided')
test_2 = ttest_ind(c,d,equal_var=False,nan_policy='omit',alternative='two-sided')

print(test_1)
print(test_2)

# Given these results, we fail to reject the null hypothesis at an alpha level of 0.05. 
# This suggests that there is insufficient evidence to conclude that clinician
# involvement significantly affects the number of iOS ratings.

TtestResult(statistic=np.float64(0.9254787342124383), pvalue=np.float64(0.36512616108395013), df=np.float64(21.21374729380766))
TtestResult(statistic=np.float64(0.9933048560936946), pvalue=np.float64(0.3344889550279603), df=np.float64(17.001737938857534))


**Estimating the Population Proportion For Clincial Expert Involvment in App Development**

In [138]:
import scipy.stats as st
import math
 
x = noClinican.copy() # number of clinical experts not involved
n = df.shape[0]/2 # divide by 2 because each app is recorded on 2 os systems

# calculate the sample porportion
p_hat = ((x.groupby("ClinicanInvolved")["AppID"].count()/2)/n)[0]

# Set the confidence level
confidence = 0.95

# calculate z-score
z = st.norm.ppf((1+confidence)/2)

# Calculate the margin of error
moe = z * math.sqrt((p_hat * (1-p_hat))/n) 

# calculate confidence interval
lower_bound = p_hat - moe
upper_bound = p_hat + moe

print("Sample Proportion:",p_hat)
print(f"Confidence Interval ({confidence*100}%): ({lower_bound}, {upper_bound})")

# The estimated proportion of apps developed without clinical professional involvement, approximately 64%, falls within a 
# 95% confidence interval ranging from 56.21% to 72.26%. This interval suggests that the sample statistic is likely reflective of 
# the broader population of health-related applications. However, further studies would be beneficial to confirm this finding across 
# a wider range of apps and contexts.


Sample Proportion: 0.6423357664233577
Confidence Interval (95.0%): (0.5620743875820725, 0.7225971452646428)


  p_hat = ((x.groupby("ClinicanInvolved")["AppID"].count()/2)/n)[0]


**Using an Anova to test whether there exist any significant differences between the average app rating based on developer category**

In [165]:
from scipy.stats import f_oneway
# ( H_0 ): There is no association between clinical expert involvement in app development and the number of Android ratings.
# ( H_1 ): There is an association between clinical expert involvement in app development and the number of Android ratings.

# Testing IOS Significance
category_means = df.groupby(["Howwouldyoubestcategorizetheappdeveloper",'Reviewer'])['iOSAvgRating'].agg(['mean']).unstack()
for_profit = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "For-profit company"]["iOSAvgRating"]
government = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Government agency"]["iOSAvgRating"]
individual = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Individual"]["iOSAvgRating"]
medical = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Medical professional society"]["iOSAvgRating"]
other = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Other non-profit organization"]["iOSAvgRating"]

f_oneway(for_profit,government,individual,medical,other,nan_policy='omit')

F_onewayResult(statistic=np.float64(0.20999884770703464), pvalue=np.float64(0.9321003744419887))

In [164]:
# ( H_0 ): There is no difference in average app rating among different developer categories.
# ( H_1 ): There is a difference in average app rating among different developer categories.

# Testing for Android Significance
for_profit = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "For-profit company"]["AndroidAvgRating"]
government = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Government agency"]["AndroidAvgRating"]
individual = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Individual"]["AndroidAvgRating"]
medical = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Medical professional society"]["AndroidAvgRating"]
other = df[df['Howwouldyoubestcategorizetheappdeveloper'] == "Other non-profit organization"]["AndroidAvgRating"]

f_oneway(for_profit,government,individual,medical,other,nan_policy='omit')

# Based on the ANOVA results, we fail to reject the null hypothesis for both iOS and 
# Android average ratings at a significance level of 0.05. This suggests that there is insufficient evidence to conclude that there is 
# a significant difference in average app rating across different developer categories. 
# Therefore, we must accept the null hypothesis and reject the alternative hypothesis in both cases.

F_onewayResult(statistic=np.float64(0.0887153928808212), pvalue=np.float64(0.9857265084083967))