All used libraries are show imported in the following code block.

In [None]:
import scipy.stats as stats
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import matplotlib.pyplot as plt

Below are some useful definitions that will be used later in the analysis of the data.

In [None]:

def t_test(group1, group2):
    # Perform the t-test
    t_statistic, p_value = stats.ttest_ind(group1, group2)

    # Print the results
    print("T-statistic:", t_statistic)
    print("P-value:", p_value)

    # Interpret the results
    alpha = 0.05  # Significance level
    if p_value < alpha:
        print("Reject the null hypothesis: There is a significant difference between the groups.")
    else:
        print("Fail to reject the null hypothesis: There is no significant difference between the groups.")

    return(t_statistic, p_value)



def linear_regression(data, features, target):
    X = data[features]
    y = data[target]

    # Create a linear regression model
    model = LinearRegression()

    # Fit the model to the data
    model.fit(X, y)

    # Get coefficients, intercept, and R-squared
    coefficients = model.coef_
    intercept = model.intercept_
    r_squared = model.score(X, y)

    # Print the results
    print("Coefficients", coefficients)
    print("Intercept", intercept)
    print("R-Squared", r_squared)

    return coefficients, intercept, r_squared

def logistic_regression(dataset, features, target, test_size=0.2, random_state=42): # Note random state will always be set to 42 in our analysis
    # Load and prepare data
    X = data[features]
    y = data[target]

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Create a logistic regression model
    model = LogisticRegression()

    # Fit the model to the training data
    model.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Evaluate the model
    confusion = confusion_matrix(y_test, y_pred)

    # Print the results
    print("Confusion Matrix", confusion)

    return confusion

Below are some definitions for basic visuals that can be used.

In [None]:
def stacked_bar_chart(data, categories, category_labels, colors=None, title=None, xlabel=None, ylabel=None, legend_labels=None):

    # Create figure and axis
    fig, ax = plt.subplots()

    # Number of categories
    num_categories = len(categories)

    # Set default colors if not provided
    if colors is None:
        colors = plt.cm.Paired(range(num_categories))

    # Initialize bottom values for stacking
    bottom = [0] * len(data[0])

    # Plot each category as a stacked bar
    for i in range(num_categories):
        ax.bar(category_labels, data[i], bottom=bottom, label=legend_labels[i] if legend_labels else None, color=colors[i])
        bottom = [bottom[j] + data[i][j] for j in range(len(data[i]))]

    # Add title, label, legend
    if title:
        ax.set_title(title)
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)
    if legend_labels:
        ax.legend()

    return fig

def histogram(data, bins=10, color='blue', title=None, xlabel=None, ylabel=None):
    # Create figure and axis
    fig, ax = plt.subplots()

    # Plot the histogram
    ax.hist(data, bins=bins, color=color)

    # Add title and labels
    if title:
        ax.set_title(title)
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)

    return fig

def create_scatterplot(x, y, color='blue', title=None, xlabel=None, ylabel=None):
    # Create figure and axis
    fig, ax = plt.subplots()

    # Plot the scatterplot
    ax.scatter(x, y, color=color)

    # Add title and labels
    if title:
        ax.set_title(title)
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)

    return fig