In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import OneHotEncoder
from datetime import timedelta

# Example data loading (replace with your actual data)
data = pd.read_csv('final_complaints.csv')

# Convert categorical features to numerical using one-hot encoding
encoder = OneHotEncoder(sparse_output=False)  # Corrected parameter name
encoded_features = encoder.fit_transform(data[['area', 'type', 'department']])

# Convert the encoded features back to a DataFrame for easier concatenation
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(['area', 'type', 'department']))

# Combine the encoded features with the original numerical features
X = pd.concat([data[['filing_date', 'predicted_priority']], encoded_df], axis=1)

# Convert `filing_date` into numerical features (e.g., day of the year)
X['filing_date'] = pd.to_datetime(data['filing_date'])
X['day_of_year'] = X['filing_date'].dt.dayofyear
X['month'] = X['filing_date'].dt.month
X['year'] = X['filing_date'].dt.year

# Target variable: resolved_days_new (resolution time)
y = data['resolved_days_new']

# Ensure column names are all strings
X.columns = X.columns.astype(str)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")

# Now, predict for new data
new_data = pd.DataFrame({
    'filing_date': ['2025-04-01'],
    'predicted_priority': [1],  # example priority: 1 for high
    'area': ['Hadapsar'],
    'type': ['Technical'],
    'department': ['IT']
})

# Convert `filing_date` to features
new_data['filing_date'] = pd.to_datetime(new_data['filing_date'])
new_data['day_of_year'] = new_data['filing_date'].dt.dayofyear
new_data['month'] = new_data['filing_date'].dt.month
new_data['year'] = new_data['filing_date'].dt.year

# One-hot encode categorical features
encoded_new_data = encoder.transform(new_data[['area', 'type', 'department']])

# Combine the new data into the same format as the training data
X_new = pd.concat([new_data[['filing_date', 'predicted_priority']], pd.DataFrame(encoded_new_data)], axis=1)

# Predict resolution time
predicted_resolution_time = model.predict(X_new)[0]
print(f"Predicted Resolution Time: {predicted_resolution_time} days")

# Calculate expected completion date
resolution_date = new_data['filing_date'].iloc[0] + timedelta(days=predicted_resolution_time)
print(f"Expected Completion Date: {resolution_date.strftime('%d %B %Y')}")


TypeError: float() argument must be a string or a real number, not 'Timestamp'

In [2]:
# Ensure column names are all strings
X.columns = X.columns.astype(str)
