## Descriptive Statistics:
Deals with the presentation and collection of data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df1 = pd.DataFrame(dict(id=range(6), age=np.random.randint(18, 31, size=6)))
df1

## Mean

In [None]:
df1.mean()

In [None]:
df1.age.mean()

In [None]:
df1.id.mean()

## Median

In [None]:
df1.median()

In [None]:
df1.age.median()

In [None]:
df1.id.median()

## Mode

In [None]:
from statistics import mode

df1.mode()

In [None]:
df1.age.mode()

In [None]:
df1.id.mode()

## Range

In [None]:
np.max(df1) - np.min(df1)

In [None]:
np.max(df1.age) - np.min(df1.age)

In [None]:
np.max(df1.id) - np.min(df1.id)

In [None]:
df1.boxplot(column="age", return_type="axes")

## Variance

In [None]:
np.var(df1)

In [None]:
np.var(df1.age)

In [None]:
np.var(df1.id)

## Standard Deviation

In [None]:
np.std(df1)

In [None]:
np.std(df1.age)

In [None]:
np.std(df1.id)

## Skewness and Kurtosis
Skewness: Left(negative) or Right(positive) of the bell curve of the data

Kurtosis: Up(positive) or Down(negative) of the bell curve of the data

In [None]:
df1.skew()

In [None]:
df1.kurt()

### Infrential Statistics
Inferential statistical analysis infers properties of a population, for example by testing hypotheses and deriving estimates

In [None]:
population = np.random.randint(10, 20, 1000)

np.random.seed(10)

estimates = []  # Empty list to hold point estimates

for x in range(200):  # Generatre 200 samples, each with 500 sampled values
    sample = np.random.choice(a=population, size=100)
    estimates.append(sample.mean())  # Keep the sample mean in the list

In [None]:
np.mean(population)

In [None]:
pd.DataFrame(estimates).plot(kind="density")

## Point of estimations

### Confidence Interval
Confidence Interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

To calculate margin_of_error we need to calculate the critical_value first

In [None]:
import scipy.stats as stats

z_critical = stats.norm.ppf(q=0.975)  # Percent Point Function

In [None]:
t_critical = stats.t.ppf(
    q=0.975, df=24
)  # df is degree of freedom (sample size minus 1)

In [None]:
margin_of_error = z_critical * (np.std(estimates) / np.sqrt(200))

In [None]:
# Lower limit: sample_mean - margin_of_error
np.mean(estimates) - margin_of_error

In [None]:
# Upper limit: sample_mean - margin_of_error
np.mean(estimates) + margin_of_error