In [1]:
# Import the libraries
import numpy as np
import pandas as pd


In [2]:
#Read the dataset into notebook
df = pd.read_csv("Sales_add.csv")
df.head(10)

Unnamed: 0,Month,Region,Manager,Sales_before_digital_add(in $),Sales_After_digital_add(in $)
0,Month-1,Region - A,Manager - A,132921,270390
1,Month-2,Region - A,Manager - C,149559,223334
2,Month-3,Region - B,Manager - A,146278,244243
3,Month-4,Region - B,Manager - B,152167,231808
4,Month-5,Region - C,Manager - B,159525,258402
5,Month-6,Region - A,Manager - B,137163,256948
6,Month-7,Region - C,Manager - C,130625,222106
7,Month-8,Region - A,Manager - A,131140,230637
8,Month-9,Region - B,Manager - C,171259,226261
9,Month-10,Region - C,Manager - B,141956,193735


In [3]:
# To Check the null Values
df.isna().sum()

Month                             0
Region                            0
Manager                           0
Sales_before_digital_add(in $)    0
Sales_After_digital_add(in $)     0
dtype: int64

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 5 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Month                           22 non-null     object
 1   Region                          22 non-null     object
 2   Manager                         22 non-null     object
 3   Sales_before_digital_add(in $)  22 non-null     int64 
 4   Sales_After_digital_add(in $)   22 non-null     int64 
dtypes: int64(2), object(3)
memory usage: 1008.0+ bytes


In [20]:
df.describe()

Unnamed: 0,Sales_before_digital_add(in $),Sales_After_digital_add(in $)
count,22.0,22.0
mean,149239.954545,231123.727273
std,14844.042921,25556.777061
min,130263.0,187305.0
25%,138087.75,214960.75
50%,147444.0,229986.5
75%,157627.5,250909.0
max,178939.0,276279.0


### ● The company wishes to clarify whether there is any increase in sales after stepping into digital marketing.

In [5]:
# Here we have a sales_add dataset contain sales(in $) after and before digital marketing.
# We want to test there is any increase in sales after stepping into digital marketing.
# For this we can use paired sample t-test or dependent sample t- test.


In [6]:
# Setting the hyothesis

    Ho: Sales before and after digital marketing are same
    H1: Sales incresed after digital marketing

In [7]:
# sales(in $) before digital marketing
before_digital = df['Sales_before_digital_add(in $)']
before_digital

0     132921
1     149559
2     146278
3     152167
4     159525
5     137163
6     130625
7     131140
8     171259
9     141956
10    159339
11    178939
12    145062
13    151514
14    147463
15    177195
16    140862
17    167996
18    132135
19    152493
20    147425
21    130263
Name: Sales_before_digital_add(in $), dtype: int64

In [8]:
# # sales(in $) after digital marketing
after_digital = df['Sales_After_digital_add(in $)']
after_digital

0     270390
1     223334
2     244243
3     231808
4     258402
5     256948
6     222106
7     230637
8     226261
9     193735
10    203175
11    276279
12    205180
13    253131
14    229336
15    187305
16    234908
17    191517
18    227040
19    212579
20    263388
21    243020
Name: Sales_After_digital_add(in $), dtype: int64

In [9]:
from scipy import stats # importing stats package

In [10]:
# Two sample paired t - test
stats.ttest_rel(before_digital,after_digital )

Ttest_relResult(statistic=-12.09070525287017, pvalue=6.336667004575778e-11)

In [11]:
pvalue = 6.336667004575778e-11
print('pvalue =',pvalue)
if pvalue < 0.05:
    print('Reject the null hypothesis(Ho)')
else:
    print('Accept null hypothesis(Ho)')

pvalue = 6.336667004575778e-11
Reject the null hypothesis(Ho)


Conclusion :
     Here pvalue is less than 0.05(5%),so we can reject the null hypothesis and accept alternative H1, This means the sales increased after stepping into digital marketing.


### ● The company needs to check whether there is any dependency between the features “Region” and “Manager”.

In [12]:
 # we want to check whether there is any dependency between the features Region and manager.
 # For this we can use chi-square test for independence.

In [13]:
# Count for the three categories of 'Region'
df['Region'].value_counts()

Region - A    10
Region - B     7
Region - C     5
Name: Region, dtype: int64

In [14]:
# Count for the three categories of 'Manager'
df['Manager'].value_counts()

Manager - A    9
Manager - B    7
Manager - C    6
Name: Manager, dtype: int64

In [15]:
# Setting the null and Alternative Hypothesis

    Ho:There is no significant relationship between the features 'Region' and 'Manager'.
    H1:There is significant relationship(dependency) between the features 'Region' and 'Manager'.

In [16]:
# Chi-square test of independence with SCIPY.STATS
# This method requires one to pass a contingency table, this can be accomplished using pandas.crosstab

In [17]:
crosstab = pd.crosstab(df["Manager"],df["Region"],margins = True)
crosstab

Region,Region - A,Region - B,Region - C,All
Manager,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Manager - A,4,4,1,9
Manager - B,3,1,3,7
Manager - C,3,2,1,6
All,10,7,5,22


In [18]:
stats.chi2_contingency(crosstab)

(3.050566893424036,
 0.962256341757093,
 9,
 array([[ 4.09090909,  2.86363636,  2.04545455,  9.        ],
        [ 3.18181818,  2.22727273,  1.59090909,  7.        ],
        [ 2.72727273,  1.90909091,  1.36363636,  6.        ],
        [10.        ,  7.        ,  5.        , 22.        ]]))

    Chi-Square Test Statistic: 3.05056
    p-value: 0.9622
    Degrees of freedom: 9 (calculated as #rows-1 * #columns-1)
    Array: The last array displays the expected values for each cell in the contingency table.


In [19]:
pvalue = 0.962256
print('pvalue =',pvalue)
if pvalue < 0.05:
    print('Reject the null hypothesis(Ho)')
else:
    print('Accept null hypothesis(Ho)')

pvalue = 0.962256
Accept null hypothesis(Ho)


That means,there is no significant relationship between features "Regions" and "Manager".so,there is no any dependency between the features.