In [1]:
import pandas as pd
import numpy as np
from keras import models, layers
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Step 1: Load the data
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data', header = None)
avgs = []

# finding the average of each columns
for i in range (len(df.columns)):
  count = 0
  sum = 0
  for j in df[i]:
   if j!=("?"):
     try:
          var = float(j)
          sum += (var)
          count+=1
     except:
       pass
  try: 
    avg = sum/count
    avgs.append(avg)
  except:
    avgs.append(0)
    pass

# If the cell is "?" replace by the average of the column
for i in range (len(df.columns)):
  for j in range (len(df[i])):
   if (df.iloc[j,i])==("?"):
     df.iloc[j,i] = avgs[i]
   else:
     try:
      (df.iloc[j,i]) = float(df.iloc[j,i])
     except:
       pass


In [None]:
# Step 2: Prepare the data
# drop the non-numerical column
df = df.drop(df.columns[3], axis = 1)
# normalize columns by subtracting the mean and dividing by the standard deviation
df -= df.mean()
df /= df.std()

In [None]:
def load_crime_data(df):
  ys = df.iloc[:,-1:].to_numpy()
  ys = np.asarray(ys).astype(np.float32)
  xs = df.iloc[:,0:127].to_numpy()
  xs = np.asarray(xs).astype(np.float32)
  
  return [xs, ys]

xs,ys= load_crime_data(df)[0],load_crime_data(df)[1]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Step 3: Build the network
def build_model():
  model = models.Sequential()
  model.add(layers.Dense(16, activation = 'relu', input_shape=(127,)))
  model.add(layers.Dense(32, activation = 'relu'))
  model.add(layers.Dense(16, activation = 'relu'))
  model.add(layers.Dense(1))
  model.compile(optimizer = 'rmsprop', loss = 'mse', metrics = ['mae'])
  return model

In [None]:
#Step 4: Training and validation
k = 4
num_val_samples = len(xs)//k 
num_epochs = 200
all_scores = []
all_mae_histories = []

for i in range(k):
  print("processing fold #", i)

  val_data = xs[i * num_val_samples: (i+1) * num_val_samples]
  val_targets = ys[i * num_val_samples: (i+1) * num_val_samples]

  x_train = np.concatenate(
    [xs[:i * num_val_samples],
     xs[(i+1) * num_val_samples:]],
     axis = 0)
  
  y_train = np.concatenate(
      [ys[:i * num_val_samples],
       ys[(i+1) * num_val_samples:]],
        axis=0)

In [None]:
model = build_model()
history = model.fit(
    x_train,
    y_train,
    epochs = 200,
    batch_size = 5,
    verbose = 0)

In [None]:
# Plotting
mae_history = history.history['mae']
all_mae_histories.append(mae_history)
val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
all_scores.append(val_mae)

average_mae_history = [
  np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()
plt.clf()
print(np.min(all_scores), all_scores)

def smooth_curve(points,factor=.9):
  smoothed_points = []
  for point in points:
    if smoothed_points:
      previous = smoothed_points[-1]
      smoothed_points.append(previous * factor + point * (1-factor))
    else:
      smoothed_points.append(point)
  return smoothed_points

smoothed_mae_history = smooth_curve(average_mae_history[10:])

plt.plot(range(11, len(average_mae_history) + 1), smoothed_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()


In [None]:
#Step 5: Retrain on whole data set
model = build_model()
history = model.fit(
    xs,
    ys,
    epochs = 200,
    batch_size = 5,
    verbose = 0)
mae_history = history.history['mae']
all_mae_histories.append(mae_history)
val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
all_scores.append(val_mae)
print(np.mean(all_scores))

In [None]:
model.save("/content/drive/MyDrive/CS 577/AS1/Q4.h5")