In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
## Load Data 

# Example dataframe structure
df = pd.read_csv("asset.csv")  # replace with your actual file

# Required columns: asset_id, age, current_usage, usage_last_year
print(df.head())


In [None]:
## Use K-Means clustering to determine assets of each task


# Use current usage and annual usage to cluster
features = df[['current_usage', 'usage_last_year', 'age']]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=2, random_state=42)
df['cluster'] = kmeans.fit_predict(features_scaled)

# Plot to check separation
sns.scatterplot(data=df, x='current_usage', y='usage_last_year', hue='cluster')
plt.title("Clustered Task")
plt.show()


In [None]:
taskA_cluster = df['cluster'].value_counts().idxmax()  # assume majority are taskA
df['type'] = df['cluster'].apply(lambda x: 'Task A' if x == taskA_cluster else 'Task B')

df_taskA = df[df['type'] == 'Task A']
df_taskB = df[df['type'] == 'Task B']


In [None]:
## Use Linear Regression to predict next years usage 


# Assume stable usage pattern
model_taskA = LinearRegression()
X_taskA = df_taskA[['usage_last_year']]
y_taskB = df_taskB['usage_last_year']
model_taskA.fit(X_taskA, y_taskB)

df_taskA['predicted_usage_next_year'] = model_taskA.predict(X_taskA)


In [None]:
# Simplified: just use average of recent year (or median for robustness)

median_taskB = df_taskB['usage_last_year'].median()
df_taskB['predicted_usage_next_year'] = median_taskB

df = pd.concat([df_taskA, df_taskB])


In [None]:
##Add projected usage
df['projected_usage'] = df['current_usage'] + df['predicted_usage_next_year']

##Flage assets with over 750K usage 
df['will_cross_750k'] = df['projected_projected_usage'] > 750000


In [None]:
## For this scenerio assume 
## additional cost per year >750k = 15,000
## Cost saving from derffing purchase = 130,000 per unit

# Define constants
cost_per_high_usage = 15000
purchase_cost = 130000

# Summary
at_risk = df[df['will_cross_750k']]
cost_of_keeping = len(at_risk) * cost_per_high_uage
cost_of_buying = len(at_risk) * purchase_cost

print(f"Assets exceeding 750k next year: {len(at_risk)}")
print(f"Estimated extra maintenance cost: ${cost_of_keeping:,.0f}")
print(f"Cost avoided by not purchasing: ${cost_of_buying:,.0f}")
