In [None]:
import os
import sys
import pandas as pd
from scipy.stats import ttest_ind
from datetime import datetime

# Set working directory to the script's location
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)

# Load the CSV file
try:
    df = pd.read_csv('wetter.csv')
except FileNotFoundError:
    print("Error: The file 'wetter.csv' was not found in the current directory.")
    sys.exit(1)

# Parse the dates and create a 'month' column
failed_dates = 0
dates = []
months = []

for date_str in df.iloc[:, 0]:
    try:
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        dates.append(date_obj)
        months.append(date_obj.month)
    except ValueError:
        print(f"Warning: Failed to parse date '{date_str}'. Using NaT instead.")
        dates.append(pd.NaT)
        months.append(None)
        failed_dates += 1

df['date'] = dates
df['month'] = months

# Report failed date parses
if failed_dates > 0:
    print(f"Warning: {failed_dates} dates failed to parse.")

# Get the temperature column (3rd column, index 2)
temperatures = df.iloc[:, 2]

# Compute statistics
overall_avg_temp = temperatures.mean()
july_temps = temperatures[df['month'] == 7]
may_temps = temperatures[df['month'] == 5]

july_avg_temp = july_temps.mean()
may_avg_temp = may_temps.mean()

# Perform the t-test
t_stat, p_value = ttest_ind(july_temps, may_temps, equal_var=False)

# Print results
print(f"Overall Average Temperature: {overall_avg_temp:.2f}")
print(f"July Average Temperature: {july_avg_temp:.2f}")
print(f"May Average Temperature: {may_avg_temp:.2f}")
print(f"T-statistic: {t_stat:.2f}")
print(f"P-value: {p_value:.6f}")

# Conclusion about statistical significance
if p_value < 0.05:
    print("Conclusion: The difference between July and May temperatures is statistically significant at the 0.05 level.")
else:
    print("Conclusion: The difference between July and May temperatures is not statistically significant at the 0.05 level.")