# Wrangle

# Analyze and Visualize Time Differences

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
from scipy.stats import norm, kstest


def load_data(file_name):
    return pd.read_csv(file_name, parse_dates=[0]).sort_values(by='date')


def get_time_differences(df):
    """Returns the time differences between rocks in hours."""
    return df['date'].diff().dt.total_seconds() / 3600


def main():
    data = load_data("data/data.csv")
    data['time_differences'] = get_time_differences(data)
    time_differences = data['time_differences'].dropna()
    plt.hist(time_differences, bins=np.arange(0, time_differences.max() + 1, 1))
    plt.xlabel('Time difference (hours)')
    plt.ylabel('Frequency')
    plt.title('Distribution of time differences between rocks')
    plt.show()

    mean = np.mean(time_differences)
    median = np.median(time_differences)
    mode = np.argmax(np.bincount([int(x) for x in time_differences]))
    std_dev = np.std(time_differences)
    skewness = scipy.stats.skew(time_differences)
    kurtosis = scipy.stats.kurtosis(time_differences)

    print(f"Mean: {mean}, Median: {median}, Mode: {mode}, Standard Deviation: {std_dev}, Skewness: {skewness}, Kurtosis: {kurtosis}")

    # Compare to known distributions (e.g., normal distribution)
    normal_dist = norm(loc=mean, scale=std_dev)
    ks_result = kstest(time_differences, normal_dist.cdf)

    print(f"Kolmogorov-Smirnov test result: {ks_result}")
