In [12]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline

In [17]:
df=pd.read_csv('world_gdp_clean.csv')

In [29]:
def confidence_interval(sample_mean, sample_std, n, alpha):
    z = abs(norm.ppf(alpha/2))
    moe = (z*sample_std)/(n**0.5)
    return [sample_mean-moe,sample_mean+moe]

X = np.array(df['YEAR2015'],dtype='float64') # Population set
sample_size = 45
    
alpha = 0.1  # 90% CI

x = np.random.permutation(X)[:sample_size] #Sample set

mu_x = np.mean(x)
std_x = np.std(x)

print("Confidence interval:")
print("\tWe construct a 90% confidence interval for mean of a sample set taken from the year 2015.")
print("Sample mean: {0:.2e}".format(mu_x))
print("Sample standard deviation: {0:.2e}\n".format(std_x))

ci = confidence_interval(mu_x, std_x, sample_size, alpha)

print("Results:")
print("\tLower bound of mean: {0:.2e}".format(ci[0]))
print("\tUpper bound of mean: {0:.2e}\n".format(ci[1]))

Confidence interval:
	We construct a 90% confidence interval for mean of a sample set taken from the year 2015.
Sample mean: 2.83e+12
Sample standard deviation: 8.45e+12

Results:
	Lower bound of mean: 7.59e+11
	Upper bound of mean: 4.90e+12



In [26]:
def hypothesis_test(sample_mean, pop_mean, std_dev, sample_size, alpha):
    
    actual_z = abs(norm.ppf(alpha))
    hypo_z = (sample_mean - pop_mean) / (std_dev/(sample_size)**0.5)
    
    print('actual z value : {0:.2f}'.format(actual_z))
    print('hypothesis z value : {0:.2f}\n'.format(hypo_z))
    if hypo_z >= actual_z:
        return True
    else:
        return False

X = np.array(df['YEAR2006'],dtype='float64') # Population set
Y = np.array(df['YEAR2016'],dtype='float64') # Poputlation set

alpha = 0.05 # 90% rejection region

pop_size = len(X)
sample_size = 45 # Hardcoded

x = np.random.permutation(X)[:sample_size] #Sample set
y = np.random.permutation(Y)[:sample_size] #Sample set

mu_x = np.mean(x)
std_x = np.std(x)

mu_y = np.mean(y)
std_y = np.std(y)

test_mu = 2*mu_x-mu_y
test_std = ((2*std_x)**2 + std_y**2)**0.5

print('\nHypothesis testing.\n')
print('\tFollowing hypothesis is to test if the mean gdp of countries in the year 2016 is more than twice the mean gdp of the countries in the year 2006.')
print("\nMUx denotes the sample mean gdp of countries in year 2006.")
print("MUy denotes the sample mean gdp of countries in year 2016.\n")
print("Sample size =",sample_size)
print('Null hypothesis : 2*MUx - MUy <= 0')
print('Alternate hypothesis : 2*MUx - MUy > 0\n')

result = hypothesis_test(test_mu,0,test_std,sample_size,alpha)

print("\nThe result of the hypothesis test is:")
if(result==True):
    print("\tNull hypothesis is rejected. Alternate hypothesis is true")
else:
    print("\tNull hypothesis is probable.")



Hypothesis testing.

	Following hypothesis is to test if the mean gdp of countries in the year 2016 is more than twice the mean gdp of the countries in the year 2006.

MUx denotes the sample mean gdp of countries in year 2006.
MUy denotes the sample mean gdp of countries in year 2016.

Sample size = 45
Null hypothesis : 2*MUx - MUy <= 0
Alternate hypothesis : 2*MUx - MUy > 0

actual z value : 1.64
hypothesis z value : 0.54


The result of the hypothesis test is:
	Null hypothesis is probable.
