In [5]:
### Math and Statistics Functions
# scipy
# statsmodels
# pandas

### Library Imports
import pandas as pd                 # For data manipulation and analysis


In [15]:
### Create the DataFrame
data = {
    'Group_A': [25, 30, 35, 40, 45, 50, 55, 60, 65, 70],
    'Group_B': [22, 27, 34, 38, 44, 48, 53, 61, 64, 68],
    'Metric_1': [10, 15, 20, 25, 30, 35, 40, 45, 50, 55],
    'Metric_2': [8, 14, 19, 24, 29, 33, 39, 43, 49, 52],
    'Date': pd.date_range(start='2023-01-01', periods=10, freq='M')
}

df = pd.DataFrame(data)
print(df)


   Group_A  Group_B  Metric_1  Metric_2       Date
0       25       22        10         8 2023-01-31
1       30       27        15        14 2023-02-28
2       35       34        20        19 2023-03-31
3       40       38        25        24 2023-04-30
4       45       44        30        29 2023-05-31
5       50       48        35        33 2023-06-30
6       55       53        40        39 2023-07-31
7       60       61        45        43 2023-08-31
8       65       64        50        49 2023-09-30
9       70       68        55        52 2023-10-31


#### Scipy Functions

- **scipy.stats.ttest_ind()** - Perform a T-test for the means of two independent samples.
  - **Example:** `scipy.stats.ttest_ind(sample1, sample2)` - Tests if the means of two independent samples are significantly different.

- **`scipy.stats.linregress()`** - Perform linear regression on two sets of measurements.
  - **Example:** `slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)` - Computes a linear regression model and returns statistics like the slope, intercept, and R-squared value.

- **`scipy.stats.norm.ppf()`** - Calculate the percent point function (inverse of CDF) for a normal distribution.
  - **Example:** `z_value = scipy.stats.norm.ppf(0.975)` - Calculates the z-value corresponding to a 97.5% confidence level.

- **`scipy.stats.norm.cdf()`** - Calculate the cumulative distribution function for a normal distribution.
  - **Example:** `probability = scipy.stats.norm.cdf(x_value)` - Calculates the probability that a value drawn from a normal distribution is less than or equal to `x_value`.

- **`scipy.optimize.minimize()`** - Perform minimization of a scalar function.
  - **Example:** `result = scipy.optimize.minimize(func, initial_guess)` - Minimizes a function `func` starting from an initial guess.

- **`scipy.integrate.quad()`** - Perform numerical integration of a function.
  - **Example:** `integral, error = scipy.integrate.quad(func, a, b)` - Integrates the function `func` from `a` to `b`.


In [16]:
### Perform a T-test for the means of two independent samples
from scipy.stats import ttest_ind

# Perform T-test
t_stat, p_value = ttest_ind(df['Group_A'], df['Group_B'])

print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: 0.23154656767745735, P-value: 0.8195020042345589


In [17]:
### Perform linear regression on two sets of measurements
from scipy.stats import linregress

# Perform linear regression
slope, intercept, r_value, p_value, std_err = linregress(df['Metric_1'], df['Metric_2'])

print(f"Slope: {slope}, Intercept: {intercept}, R-squared: {r_value**2}")


Slope: 0.9818181818181818, Intercept: -0.9090909090909101, R-squared: 0.9980832420591457


In [18]:
### Calculate the probability that a value drawn from a normal distribution is less than or equal to 50
from scipy.stats import norm

# Calculate cumulative distribution function value
probability = norm.cdf(50, loc = np.mean(df['Metric_1']), scale = np.std(df['Metric_1']))

print(f"Probability that a value ≤ 50: {probability}")

Probability that a value ≤ 50: 0.8884912650266925


In [21]:
### Perform numerical integration of the function f(x) = 3x^2 + e^sqrt(x) from 0 to 1
from scipy.integrate import quad

# Define the function to integrate
def func(x):
    return 3*(x ** 2) + np.exp(np.sqrt(x))

# Perform numerical integration
integral, error = quad(func, 0, 1)

print(f"Integral of f(x) = 3x^2 + e^sqrt(x) from 0 to 1: {integral}")


Integral of f(x) = 3x^2 + e^sqrt(x) from 0 to 1: 3.000000000000001


#### Stats Models Functions
- **`statsmodels.tsa.stattools.adfuller()`** - Perform the Augmented Dickey-Fuller (ADF) test for stationarity.
  - **Example:** `adf_result = statsmodels.tsa.stattools.adfuller(time_series)` - Tests if a time series is stationary.

- **`statsmodels.tsa.stattools.kpss()`** - Perform the KPSS test for stationarity.
  - **Example:** `kpss_result = statsmodels.tsa.stattools.kpss(time_series)` - Tests for stationarity of a time series.

- **`statsmodels.tsa.seasonal.seasonal_decompose()`** - Decompose a time series into trend, seasonal, and residual components.
  - **Example:** `decomposition = statsmodels.tsa.seasonal.seasonal_decompose(time_series, model='additive')` - Decomposes the time series into its components.


In [23]:
### Test if Metric_2 col is stationary using adf
from statsmodels.tsa.stattools import adfuller

# Perform ADF test
adf_result = adfuller(df['Metric_2'])

print(f"ADF Statistic: {adf_result[0]}, P-value: {adf_result[1]}")


ADF Statistic: -1.693973863795058, P-value: 0.4342199676184899


In [None]:
### Decompose the Metric_1 time series into trend, seasonal, and residual components
from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose the time series
decomposition = seasonal_decompose(df['Metric_1'], model='additive', period=1)

# Plot the decomposed components
decomposition.plot()


### Pandas Functions
- **`pandas.DataFrame.corr()`** - Compute pairwise correlation of columns.
  - **`Example: correlation_matrix = df.corr()`** - Computes the correlation matrix of the DataFrame's columns.

- **`pandas.DataFrame.cov()`** - Compute pairwise covariance of columns.
  - **`Example: covariance_matrix`** = df.cov() - Computes the covariance matrix of the DataFrame's columns.

In [26]:
### Compute the covariance and correlation of Metric_1 col
cov = df['Metric_1'].cov(df['Metric_1'])
corr = df['Metric_1'].corr(df['Metric_1'])

print(f'The covariance is: {cov} and the correlation is: {corr}')

The covariance is: 229.16666666666666 and the correlation is: 1.0
