In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Define the variable name for the unit power efficiency column
unit_power_efficiency = 'TH/J'

# Load the data for the maximum efficiency for all dates
max_efficiency_table = pd.read_csv('../hardwarelist/Bitcoin max updated.csv')

# Assuming the 'Date' column in max_efficiency_table is in a format that can be converted to datetime
max_efficiency_table['Date'] = pd.to_datetime(max_efficiency_table['Date'])

# Load your data from the CSV file
data = pd.read_csv('datasets/extracted/final3.csv')

# Convert the 'date' column to a datetime format
data['date'] = pd.to_datetime(data['date'])

# Extract year from the 'date' column and create a new column 'year'
data['year'] = data['date'].dt.year

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))

# Extract years from the data
years = data['year'].unique()

# Initialize lists to store yearly 'TH/J' values
th_j_values = []

# Create an empty array to store the regression lines
regression_lines = []

# Iterate over each year, calculate the slope, and store the data
for year in years:
    # Filter data for the current year
    yearly_data = data[data['year'] == year]

    # Fit a linear regression model
    model = LinearRegression()
    X = np.arange(len(yearly_data)).reshape(-1, 1)
    y = yearly_data[unit_power_efficiency]
    model.fit(X, y)

    # Calculate the mean 'TH/J' value for the year
    th_j_mean = yearly_data[unit_power_efficiency].mean()

    # Store the yearly 'TH/J' value
    th_j_values.append(th_j_mean)

    # Store the regression line for the year (point at the end of the year)
    end_of_year_date = yearly_data['date'].max()
    regression_lines.append((end_of_year_date, th_j_mean))

# Create a DataFrame to store the yearly 'TH/J' values and regression points
results = pd.DataFrame({'Year': years, unit_power_efficiency: th_j_values})

# Print the yearly 'TH/J' values
print(results)

# Scatter plot for all data points
plt.scatter(data['date'], data[unit_power_efficiency], c='blue', s=10, label='Data Points', alpha=0.5)

# Plot the regression lines (one point per year)
regression_dates, regression_th_j_values = zip(*regression_lines)
plt.plot(regression_dates, regression_th_j_values, marker='o', linestyle='-', color='black', label=f'Yearly {unit_power_efficiency} Regression')

# Plot the Hardware (TH/J) line
plt.plot(max_efficiency_table['Date'], max_efficiency_table['Hardware (TH/J)'], color='red', label='Hardware (TH/J)', linewidth=2)

# Plot the Hardware with Archaicity (TH/J) line
plt.plot(max_efficiency_table['Date'], max_efficiency_table['Hardware with Archaicity (TH/J)'], color='green', label='Hardware with Archaicity (TH/J)', linewidth=2)

plt.yscale('log')  # Set the y-axis to a logarithmic scale base 10

# Add year labels with angled text
for year, th_j_value in zip(years, regression_th_j_values):
    plt.text(year, th_j_value, str(year), rotation=45, ha='right', va='center', fontsize=10)

plt.ylabel('Power Efficiency (' + unit_power_efficiency + ')')
plt.title(f'Yearly-Updated {unit_power_efficiency} with Data Points and Max Efficiency')
plt.legend(loc='upper left')
plt.yticks()
plt.tight_layout()
plt.show()