https://stackoverflow.com/questions/21415661/logical-operators-for-boolean-indexing-in-pandas
https://www.listendata.com/2019/04/create-dummy-data-in-python.html

# 1.  Generating synthetic data

In [4]:
import pandas as pd
import numpy as np
import random

## 1.1 Rules for data generation
- We would like the age to be between 14 to 35
- Gender should be either 0 or 1, 0 being female and 1 male.
- Status would be calcauted using , two simple rules
    - A worker must be 18 years or above for male
    - A worker must be 20 years or above for female

In [5]:
np.random.seed(1)
data = pd.DataFrame({"age"  : np.random.choice(range(15,35), 500),
                     "gender"  : np.random.choice(range(1,2), 500)})
data["is_underage"] = (data["gender"] == 1) & (data["age"] <= 18) | (data["gender"] == 0) & (data["age"] <= 20) 
data.head()

Unnamed: 0,age,gender,is_underage
0,20,1,False
1,26,1,False
2,27,1,False
3,23,1,False
4,24,1,False


# 2. Pre-Processing

## 2.1. Train Test spilt

In [6]:
train=data.sample(frac=0.8,random_state=200)
test=data.drop(train.index)

In [7]:
train.describe()

Unnamed: 0,age,gender
count,400.0,400.0
mean,24.5775,1.0
std,5.702657,0.0
min,15.0,1.0
25%,20.0,1.0
50%,25.0,1.0
75%,29.0,1.0
max,34.0,1.0


In [8]:
test.describe()

Unnamed: 0,age,gender
count,100.0,100.0
mean,24.42,1.0
std,5.775567,0.0
min,15.0,1.0
25%,19.75,1.0
50%,24.0,1.0
75%,30.0,1.0
max,34.0,1.0


## 2.2 Training

### A simple nueral network

In [63]:
weights = np.array([0.5,0.48])
alpha = 0.1
input = train[['age','gender']].iloc[[2]]
goal_prediction = train[['is_underage']].iloc[[2]]

In [75]:
goal_prediction.shape

()

In [76]:
input.shape

(3,)

In [77]:
weights.shape

(3,)

In [79]:
for i in range(20):
    prediction = input.dot(weights)
    error = (goal_prediction - prediction) ** 2
    delta = prediction - goal_prediction
    weights = weights - (alpha * (input * delta))
    print("Error:" + str(error) + " Prediction:" + str(prediction))


Error:3.4028236692096106e-06 Prediction:-0.0018446744073710164
Error:2.177807148294282e-06 Prediction:-0.0014757395258968575
Error:1.3937965749083403e-06 Prediction:-0.001180591620717486
Error:8.920298079413378e-07 Prediction:-0.0009444732965739888
Error:5.708990770825904e-07 Prediction:-0.0007555786372592799
Error:3.653754093328579e-07 Prediction:-0.0006044629098074239
Error:2.3384026197294315e-07 Prediction:-0.0004835703278458503
Error:1.4965776766264926e-07 Prediction:-0.00038685626227663583
Error:9.578097130409553e-08 Prediction:-0.00030948500982130867
Error:6.129982163457715e-08 Prediction:-0.0002475880078569581
Error:3.923188584614697e-08 Prediction:-0.0001980704062856109
Error:2.5108406941548135e-08 Prediction:-0.00015845632502853313
Error:1.6069380442568288e-08 Prediction:-0.00012676506002273769
Error:1.0284403483261718e-08 Prediction:-0.00010141204801827897
Error:6.5820182292947054e-09 Prediction:-8.112963841466758e-05
Error:4.212491666737083e-09 Prediction:-6.490371073164525e

### Learning the whole dataset

In [80]:
for iteration in range(40):
    error_for_all_lights = 0
    for row_index in range(len(walk_vs_stop)):
        input = streetlights[row_index]
        goal_prediction = walk_vs_stop[row_index]
        
        prediction = input.dot(weights)
        
        error = (goal_prediction - prediction) ** 2
        error_for_all_lights += error
        
        delta = prediction - goal_prediction
        weights = weights - (alpha * (input * delta))	
        print("Prediction:" + str(prediction))
    print("Error:" + str(error_for_all_lights) + "\n")

Prediction:-2.126764793253244e-05
Prediction:-0.12000850705917299
Prediction:-0.48800765635325566
Prediction:0.7527854529288143
Prediction:0.20223686940222446
Prediction:0.29000382469691166
Error:2.2742137921602397

Prediction:0.23200305975752933
Prediction:0.3095888070763354
Prediction:-0.3178686921041116
Prediction:1.0519701740739809
Prediction:0.4690638800566833
Prediction:0.32913001348833654
Error:1.0244541687036883

Prediction:0.26330401079066923
Prediction:0.5160077016174461
Prediction:-0.249029400896412
Prediction:1.1457809407508008
Prediction:0.608552913233438
Prediction:0.293933899086928
Error:0.6264732309595671

Prediction:0.2351471192695424
Prediction:0.6339342287511034
Prediction:-0.21586137091595212
Prediction:1.1693191635489535
Prediction:0.6948696873826872
Prediction:0.24295960818405937
Error:0.4166973060663361

Prediction:0.19436768654724748
Prediction:0.7121630204330192
Prediction:-0.19564315043395356
Prediction:1.1692157290182603
Prediction:0.7554515855861587
Predicti