In [None]:
import requests
import pandas as pd
import numpy as np
# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00357/occupancy_data.zip'

response = requests.get(url) # Download the dataset
zip_file = response.content # Extract the data file

from zipfile import ZipFile
import io
with ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
    zip_ref.extractall('occupancy_data')

# Calculate   
def PercentCorrect(inputs, targets, weights):
    N = len(targets)
    nCorrect = 0
    for n in range(N):
        OneInput = inputs[n,:]
        if (targets[n] * np.dot(OneInput, weights) > 0):
            nCorrect +=1
    return 100*nCorrect/N

#Training Data
dataTrainingPath = 'occupancy_data/datatraining.txt'
dataTraining = pd.read_csv(dataTrainingPath)

realX = []
realY = []

for index, row in dataTraining.iterrows():
    rowValues = [row['Temperature'], row['Humidity'], row['Light'], row['CO2'], row['HumidityRatio'] ]
    targetData = row['Occupancy']
    realX.append(rowValues)
    realY.append(targetData)

X_train = np.array(realX) # Contain Features
Y_real = np.array(realY) # Contain real occupancies
Y_train = np.array([]) 

for i in range( len(X_train) ):
    if Y_real[i] == 1:
        Y_train = np.append(Y_train, [-1], axis=0)
    else:
        Y_train = np.append(Y_train, [1], axis=0)

#Test Data
dataTestPath = 'occupancy_data/datatest.txt'
dataTest = pd.read_csv(dataTestPath)

testX = []
testY = []

for index, row in dataTest.iterrows():
    x = [row['Temperature'], row['Humidity'], row['Light'], row['CO2'], row['HumidityRatio'] ]
    y = row['Occupancy']
    testX.append(x)
    testY.append(y)

X_test = np.array(testX) # Contain Features
Y_real_test = np.array(testY) # Contain real occupancies
Y_test = np.array([]) 

for i in range( len(X_test) ):
    if Y_real_test[i] == 1:
        Y_test = np.append(Y_test, [-1], axis=0)
    else:
        Y_test = np.append(Y_test, [1], axis=0)

w = np.random.randn(5) # 5 random values drawn from a standard normal distribution

MaxIter=1000
alpha = 0.002

#Training the model
for iter in range(MaxIter):
    # Select a data item at random
    r = np.floor(np.random.rand()* len(X_train) ).astype(int)
    x = X_train[r,:]
    x = x.astype(np.float32)

    # If it is misclassified, update weights
    if (Y_train[r] * np.dot(x, w) < 0):
        w += alpha * Y_train[r] * x

# Evaluate training and test performances
print("Model correctness on training data:", PercentCorrect(X_train, Y_train, w))
print("Model correctness on testing data:", PercentCorrect(X_test, Y_test, w))