# Math for Data Science
## Author - Min Set Khant
## Date - 7th September 2025


In [2]:
# math_for_data_science_with_sales.py


import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
import sympy as sp

**At first, let's create the sample sale data sample set for analysis**

In [3]:
# Generate Sample Sales Data
np.random.seed(42)
dates = pd.date_range(start="2025-01-01", periods=30, freq='D')
sales = np.random.randint(100, 500, size=30)
units = np.random.randint(1, 20, size=30)
prices = np.random.uniform(10, 50, size=30)


sales_data = pd.DataFrame({
'Date': dates,
'Units_Sold': units,
'Price_per_Unit': prices,
'Total_Sales': sales
})

sales_data.head()



Unnamed: 0,Date,Units_Sold,Price_per_Unit,Total_Sales
0,2025-01-01,17,40.214456,202
1,2025-01-02,10,27.006235,448
2,2025-01-03,16,18.317667,370
3,2025-01-04,15,32.708013,206
4,2025-01-05,15,11.252532,171


In [4]:
# Sales Dataset types 

sales_data.describe()

Unnamed: 0,Date,Units_Sold,Price_per_Unit,Total_Sales
count,30,30.0,30.0,30.0
mean,2025-01-15 12:00:00,11.333333,29.716509,308.433333
min,2025-01-01 00:00:00,2.0,10.563193,120.0
25%,2025-01-08 06:00:00,7.0,21.76738,209.75
50%,2025-01-15 12:00:00,12.0,29.411768,302.5
75%,2025-01-22 18:00:00,15.75,38.931579,404.25
max,2025-01-30 00:00:00,19.0,48.610212,485.0
std,,5.479324,11.209982,109.450881


**Basic Stastics for the Explotary Data Analysis**

In [5]:
# 1. Basic Statistics
def basic_statistics(df):
    print(df.describe())
    print("Mean Sales:", np.mean(df['Total_Sales']))
    print("Median Units Sold:", np.median(df['Units_Sold']))
    mode_price = stats.mode(df['Price_per_Unit'], keepdims=True)
    print("Mode Price:", mode_price.mode[0])

In [6]:
# 2. Probability Functions
def probability_functions(df):
    mean_sales = np.mean(df['Total_Sales'])
    std_sales = np.std(df['Total_Sales'], ddof=1)  # sample std
    if std_sales > 0:
        print("Normal PDF for mean sales:", stats.norm.pdf(mean_sales, mean_sales, std_sales))
        print("Normal CDF for mean sales:", stats.norm.cdf(mean_sales, mean_sales, std_sales))
    else:
        print("Standard deviation is zero. PDF/CDF undefined.")

In [7]:
# 3. Correlation & Covariance
def correlation_covariance(df):
    print("Correlation Matrix:\n", df.corr(numeric_only=True))
    print("Covariance Matrix:\n", df.cov(numeric_only=True))

In [8]:
# 4. Hypothesis Testing
def hypothesis_testing(df):
    # Test against a fixed value (e.g., 250) instead of mean (which always returns t=0)
    t_stat, p_val = stats.ttest_1samp(df['Total_Sales'], popmean=250)
    print("One-sample t-test vs 250:", t_stat, p_val)

In [9]:
# 5. Linear Algebra Example

def linear_algebra():
    A = np.array([[1, 2], [3, 4]])
    B = np.array([[5, 6], [7, 8]])
    print("Matrix Multiplication A*B:\n", np.dot(A, B))


In [10]:
# 6. Calculus Example

def calculus_example():
    x = sp.Symbol('x')
    f = x**2 + 3*x + 5
    derivative = sp.diff(f, x)
    integral = sp.integrate(f, x)
    print("Derivative of x^2+3x+5:", derivative)
    print("Integral of x^2+3x+5:", integral)


In [12]:
# 7. Regression Example

def regression_example(df):
    X = df[['Units_Sold']]
    y = df['Total_Sales']
    model = LinearRegression()
    model.fit(X, y)
    print("Slope:", model.coef_[0])
    print("Intercept:", model.intercept_)
    prediction = model.predict(np.array([[10]]))  # ensure 2D array
    print("Predict Sales for 10 Units:", prediction[0])

# Visualization Examples


In [13]:
def visualization_examples(df):
    sns.histplot(df['Total_Sales'], kde=True)
    plt.title("Total Sales Distribution")
    plt.show()

    sns.boxplot(x=df['Units_Sold'])
    plt.title("Units Sold Boxplot")
    plt.show()

    sns.scatterplot(x='Units_Sold', y='Total_Sales', data=df)
    plt.title("Units Sold vs Total Sales")
    plt.show()

    sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
    plt.title("Correlation Heatmap")
    plt.show()

    

# PCA 

In [14]:
def pca_example(df):
    features = df[['Units_Sold', 'Price_per_Unit', 'Total_Sales']]
    pca = PCA(n_components=2)
    components = pca.fit_transform(features)
    print("PCA Components (first 5 rows):\n", components[:5])