In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv('/content/sample_data/2025-grievances.csv')

In [None]:
print(df.head())
print(df.columns)

In [None]:
df['Grievance Date'] = pd.to_datetime(df['Grievance Date'])
mask = df['Sub Category'] == 'Street Light Not Working'
df_sub = df[mask]
daily = df_sub.groupby(df_sub["Grievance Date"].dt.date).size().reset_index(name = 'count')

In [None]:
daily['date'] = pd.to_datetime(daily['Grievance Date'])
daily = daily[['date','count']]


In [None]:
full_range = pd.date_range(daily['date'].min(),daily['date'].max(),freq = 'D')

In [None]:
daily = daily.set_index('date').reindex(full_range, fill_value=0).rename_axis('date').reset_index()

print(daily.head(10))

Add Seasonality Feature

In [None]:
daily['month'] = daily['date'].dt.month

In [None]:
def get_season(month):
    if month in [6]:
      return 'Monsoon'
    elif month in [3,4,5]:
      return 'Summer'
    elif month in [11,12,1,2]:
      return 'Winter'
    else: return 'Post-monsoon'

Festival data

In [None]:
festival_dates = [
    '2025-03-29',  # e.g., Ugadi
    '2025-04-12',  # e.g., Ram Navami
    '2025-05-16',  # Add more as needed
]
daily['is_festival'] = daily['date'].astype(str).isin(festival_dates).astype(int)

print(daily.head(10))

In [None]:
def get_season(month):
  if month == 6:
    return 1
  elif month in [3,4,5]:
    return 2
  elif month in [11,12,1,2]:
    return 3
  else:
    return 4
daily['season'] = daily['month'].apply(get_season)

LSTM DATA PROCESSING

select features for model

In [None]:
features = ['count','month','season','is_festival']
X_all = daily[features].values.astype(float)

normalization

In [None]:
scaler = MinMaxScaler()
X_all_scaled = scaler.fit_transform(X_all)

In [None]:
# Prepare sliding windows for LSTM (e.g., 30-day sequences)
sequence_length = 30

In [None]:
def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len][0])  # Predict 'count'
    return np.array(X), np.array(y)

X, y = create_sequences(X_all_scaled, sequence_length)
print(X.shape, y.shape)

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

Split the data and Build the LSTM Model

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

Train/test

In [None]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
model = Sequential([
    LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])

In [None]:
model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, y_train, epochs=30, batch_size=8, validation_data=(X_test, y_test))

Evaluate and Visualize Predictions

In [None]:
preds = model.predict(X_test)
X_full = np.zeros((len(preds),len(features)))
X_full[:,0] = preds[:,0]
y_pred_inv = scaler.inverse_transform(X_full)[:,0]

y_true_full = np.zeros((len(y_test), len(features)))
y_true_full[:,0] = y_test
y_test_inv = scaler.inverse_transform(y_true_full)[:,0]


In [None]:
plt.figure(figsize=(10,4))
plt.plot(y_test_inv, label='Actual')
plt.plot(y_pred_inv, label='Predicted')
plt.legend()
plt.title('LSTM Forecast (with Seasonality)')
plt.xlabel('Test Sample Index')
plt.ylabel('Daily Complaints')
plt.grid()
plt.show()

In [None]:
scaler = MinMaxScaler()

In [None]:
df.head(),df.tail()

In [None]:
scaler.fit(train)
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)

In [None]:
scaled_train[:10]

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator


In [None]:
n_input = 3
n_features = 1
generator = TimeseriesGenerator(scaled_train,scaled_train,length=n_input,batch_size=1)

In [None]:

X,y = generator[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')


In [None]:
X.shape

In [None]:

# We do the same thing, but now instead for 12 months
n_input = 12
generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [None]:

# define model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
model.summary()

In [None]:
model.fit(generator,epochs=50)

In [None]:
loss_per_epoch = model.history.history['loss']
plt.plot(range(len(loss_per_epoch)),loss_per_epoch)

In [None]:
last_train_batch = scaled_train[-12:]

In [None]:
last_train_batch = last_train_batch.reshape((1, n_input, n_features))

In [None]:


model.predict(last_train_batch)

In [None]:
scaled_test[0]

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features)) #reshaping the last 12 values

for i in range(len(test)):

    # get the prediction value for the first batch
    current_pred = model.predict(current_batch)[0]

    # append the prediction into the array
    test_predictions.append(current_pred)

    # use the prediction to update the batch and remove the first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
test_predictions

In [None]:
test.head()

In [None]:

true_predictions = scaler.inverse_transform(test_predictions)


test['Predictions'] = true_predictions


test.plot(figsize=(14,5))

In [None]:
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse=sqrt(mean_squared_error(test['Production'],test['Predictions']))
print(rmse)