In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind, chi2_contingency, pearsonr

# Load cleaned dataset
df = pd.read_csv("AI_tools_cleaned_dataset.csv")

In [2]:
# FEATURE CREATION 

# Tool Age
current_year = 2025
df['Tool_Age'] = current_year - df['Year Founded']

# Binary encoding Website Status
df['Website_Active'] = df['Website Status'].apply(lambda x: 1 if x == 'Active' else 0)

In [3]:
# HYPOTHESIS TEST 1 
# H0: No difference in tool age between Active and Inactive websites

active_age = df[df['Website_Active'] == 1]['Tool_Age']
inactive_age = df[df['Website_Active'] == 0]['Tool_Age']

t_stat, p_value = ttest_ind(active_age, inactive_age, nan_policy='omit')

print("\nT-Test: Website Status vs Tool Age")
print("T-statistic:", round(t_stat, 4))
print("P-value:", p_value)

if p_value < 0.05:
    print("Result: Significant difference (Reject H0)")
else:
    print("Result: No significant difference (Fail to Reject H0)")


T-Test: Website Status vs Tool Age
T-statistic: -4.2381
P-value: 2.2666770383163806e-05
Result: Significant difference (Reject H0)


In [4]:
#  HYPOTHESIS TEST 2 
# Chi-square: Category vs Website Status

contingency = pd.crosstab(df['Category'], df['Website Status'])

chi2, p, dof, expected = chi2_contingency(contingency)

print("\nChi-Square Test: Category vs Website Status")
print("Chi2:", round(chi2, 4))
print("P-value:", p)

if p < 0.05:
    print("Result: Significant Association")
else:
    print("Result: No Significant Association")


Chi-Square Test: Category vs Website Status
Chi2: 374.4625
P-value: 0.5272355268193485
Result: No Significant Association


### CORRELATION ANALYSIS 

In [5]:
corr, corr_p = pearsonr(df['Tool_Age'], df['Website_Active'])

print("\nPearson Correlation: Tool Age vs Website Active")
print("Correlation Coefficient:", round(corr, 4))
print("P-value:", corr_p)

if corr_p < 0.05:
    print("Result: Statistically Significant Correlation")
else:
    print("Result: No Significant Correlation")


Pearson Correlation: Tool Age vs Website Active
Correlation Coefficient: -0.0327
P-value: 2.2666770381574484e-05
Result: Statistically Significant Correlation
