In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.stats import ttest_ind
from scipy.stats import chi2

In [2]:
data= pd.read_csv('Sales_add.csv')
data

Unnamed: 0,Month,Region,Manager,Sales_before_digital_add(in $),Sales_After_digital_add(in $)
0,Month-1,Region - A,Manager - A,132921,270390
1,Month-2,Region - A,Manager - C,149559,223334
2,Month-3,Region - B,Manager - A,146278,244243
3,Month-4,Region - B,Manager - B,152167,231808
4,Month-5,Region - C,Manager - B,159525,258402
5,Month-6,Region - A,Manager - B,137163,256948
6,Month-7,Region - C,Manager - C,130625,222106
7,Month-8,Region - A,Manager - A,131140,230637
8,Month-9,Region - B,Manager - C,171259,226261
9,Month-10,Region - C,Manager - B,141956,193735


In [3]:
data.isnull().sum()

Month                             0
Region                            0
Manager                           0
Sales_before_digital_add(in $)    0
Sales_After_digital_add(in $)     0
dtype: int64

#### 1. The company wishes to clarify whether there is any increase in sales after stepping into digital marketing.

H0: There is no significant increase in sales after digital marketing

Ha: There is significant increase in sales after digital marketing

In [4]:
sales_before_digital_marketing = data['Sales_before_digital_add(in $)']
sales_after_digital_marketing = data['Sales_After_digital_add(in $)']

t_value, p_value = stats.ttest_rel(sales_after_digital_marketing, sales_before_digital_marketing, alternative='greater')
print("t-value: ", round(t_value,6))
print("p-value: ", round(p_value,6))

stats.ttest_rel(sales_after_digital_marketing,sales_before_digital_marketing,alternative='greater')

t-value:  12.090705
p-value:  0.0


Ttest_relResult(statistic=12.09070525287017, pvalue=3.168333502287889e-11)

#### Inference

Hypothesis test gives a p-value as 0 and t-value as 12.0907. p-value is less than alpha level(0.05), therefore null hypothesis is rejected.

Conclusion: There is an increase in sales after stepping into digital marketing.

#### 2. The company needs to check whether there is any dependency between the features “Region” and “Manager”.

H0 : There is no dependency between the features Region and Manager

Ha : There is dependency between the features Region and Manager

In [5]:
data_1 = pd.crosstab(data['Region'],data['Manager'])
data_1

Manager,Manager - A,Manager - B,Manager - C
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Region - A,4,3,3
Region - B,4,1,2
Region - C,1,3,1


In [6]:
data_1.values

array([[4, 3, 3],
       [4, 1, 2],
       [1, 3, 1]])

In [7]:
#Observed values
observed_values = data_1.values
print('Observed Values: ', observed_values)


Observed Values:  [[4 3 3]
 [4 1 2]
 [1 3 1]]


In [8]:
value = stats.chi2_contingency(data_1)
value

(3.050566893424036,
 0.5493991051158094,
 4,
 array([[4.09090909, 3.18181818, 2.72727273],
        [2.86363636, 2.22727273, 1.90909091],
        [2.04545455, 1.59090909, 1.36363636]]))

In [9]:
#Expected values
expected_values = value[3]
print('Expected values: ', expected_values)

Expected values:  [[4.09090909 3.18181818 2.72727273]
 [2.86363636 2.22727273 1.90909091]
 [2.04545455 1.59090909 1.36363636]]


In [10]:
#Finding degrees of freedom
alpha= 0.05
no_of_rows = len(data_1.iloc[:,:])
no_of_columns = len(data_1.iloc[:,:])
degree_of_freedom = (no_of_rows-1) *(no_of_columns-1)

In [11]:
chi_square = sum([(o-e)**2/e for o,e in zip(observed_values,expected_values)])
chi_square
chi_square_statistics = chi_square[0]+chi_square[1]+chi_square[2]
print('Chi-square statistics: ', chi_square_statistics)

Chi-square statistics:  3.0505668934240364


In [12]:
#p-value
p_value = 1-chi2.cdf(x=chi_square_statistics,df = degree_of_freedom)
print('p-value: ', p_value)
print('Significance level: ', alpha)
print('Degree of freedom: ', degree_of_freedom)

p-value:  0.5493991051158094
Significance level:  0.05
Degree of freedom:  4


In [14]:
if p_value <= alpha:
    print('There is dependency between the features Region and Manager. Rejecting null hypothesis')
else:
    print('There is no dependency between the features Region and Manager. We are failing to reject null hypothesis')

There is no dependency between the features Region and Manager. We are failing to reject null hypothesis
