### Importing the Dependencies

In [26]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix


### Data Collection and Processing

In [2]:
# loading the data from csv file to pandas dataframe
electricity_dataset=pd.read_csv("electricity_overuse_data.csv")
print(electricity_dataset)

    Units_kWh  AC_Hours  Appliances_On  Usage_Hours  Overuse
0          10         7              4            9        1
1           7         3              8           17        0
2          14         5              5           17        1
3          11         5             10           12        1
4           8         1              4           17        0
5          10         7              6           18        1
6          13         3              4           13        1
7           6         4              8            8        0
8          10         0              6           19        1
9          14         3             10            6        1
10         14         1              8            9        1
11         11         5              3            7        1
12          8         4              5           13        0
13          7         3             10            9        0
14         11         0              3            7        1
15         11         0 

In [3]:
#Inspecting the first 5 rows of the dataframe
electricity_dataset.head()

Unnamed: 0,Units_kWh,AC_Hours,Appliances_On,Usage_Hours,Overuse
0,10,7,4,9,1
1,7,3,8,17,0
2,14,5,5,17,1
3,11,5,10,12,1
4,8,1,4,17,0


In [4]:
#Inspecting the last 5 rows of the dataframe
electricity_dataset.tail()

Unnamed: 0,Units_kWh,AC_Hours,Appliances_On,Usage_Hours,Overuse
15,11,0,11,19,1
16,6,2,10,11,0
17,9,2,11,11,0
18,8,6,6,15,0
19,5,1,3,9,0


In [5]:
# checking the number of rows and colums
electricity_dataset.shape

(20, 5)

In [6]:
# Getting dataset info
electricity_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   Units_kWh      20 non-null     int64
 1   AC_Hours       20 non-null     int64
 2   Appliances_On  20 non-null     int64
 3   Usage_Hours    20 non-null     int64
 4   Overuse        20 non-null     int64
dtypes: int64(5)
memory usage: 932.0 bytes


In [7]:
# checking the number of missing values
electricity_dataset.isnull().sum()

Units_kWh        0
AC_Hours         0
Appliances_On    0
Usage_Hours      0
Overuse          0
dtype: int64

In [8]:
# getting the statistical measures of the data
electricity_dataset.describe()

Unnamed: 0,Units_kWh,AC_Hours,Appliances_On,Usage_Hours,Overuse
count,20.0,20.0,20.0,20.0,20.0
mean,9.65,3.1,6.75,12.3,0.55
std,2.777257,2.245463,2.863105,4.341962,0.510418
min,5.0,0.0,3.0,6.0,0.0
25%,7.75,1.0,4.0,9.0,0.0
50%,10.0,3.0,6.0,11.5,1.0
75%,11.0,5.0,10.0,17.0,1.0
max,14.0,7.0,11.0,19.0,1.0


In [9]:
electricity_dataset["Overuse"].value_counts()

Overuse
1    11
0     9
Name: count, dtype: int64

1 --> Electricity Usage High

0 --> Electricity Usage Normal

### Splitting the data and Target

In [12]:
x=electricity_dataset.drop(["Overuse"],axis=1)
y=electricity_dataset["Overuse"]

In [13]:
print(x)

    Units_kWh  AC_Hours  Appliances_On  Usage_Hours
0          10         7              4            9
1           7         3              8           17
2          14         5              5           17
3          11         5             10           12
4           8         1              4           17
5          10         7              6           18
6          13         3              4           13
7           6         4              8            8
8          10         0              6           19
9          14         3             10            6
10         14         1              8            9
11         11         5              3            7
12          8         4              5           13
13          7         3             10            9
14         11         0              3            7
15         11         0             11           19
16          6         2             10           11
17          9         2             11           11
18          

In [14]:
print(y)

0     1
1     0
2     1
3     1
4     0
5     1
6     1
7     0
8     1
9     1
10    1
11    1
12    0
13    0
14    1
15    1
16    0
17    0
18    0
19    0
Name: Overuse, dtype: int64


### Data Standardization

In [15]:
scaler=StandardScaler()
x=scaler.fit_transform(x)

In [16]:
print(x)

[[ 0.1292975   1.78195548 -0.98544789 -0.77976933]
 [-0.97896679 -0.04569117  0.44793086  1.11058055]
 [ 1.60698323  0.86813216 -0.6271032   1.11058055]
 [ 0.49871893  0.86813216  1.16462023 -0.07088812]
 [-0.60954536 -0.95951449 -0.98544789  1.11058055]
 [ 0.1292975   1.78195548 -0.26875852  1.34687429]
 [ 1.23756179 -0.04569117 -0.98544789  0.16540561]
 [-1.34838822  0.4112205   0.44793086 -1.01606306]
 [ 0.1292975  -1.41642615 -0.26875852  1.58316802]
 [ 1.60698323 -0.04569117  1.16462023 -1.48865053]
 [ 1.60698323 -0.95951449  0.44793086 -0.77976933]
 [ 0.49871893  0.86813216 -1.34379258 -1.2523568 ]
 [-0.60954536  0.4112205  -0.6271032   0.16540561]
 [-0.97896679 -0.04569117  1.16462023 -0.77976933]
 [ 0.49871893 -1.41642615 -1.34379258 -1.2523568 ]
 [ 0.49871893 -1.41642615  1.52296492  1.58316802]
 [-1.34838822 -0.50260283  1.16462023 -0.30718186]
 [-0.24012393 -0.50260283  1.52296492 -0.30718186]
 [-0.60954536  1.32504382 -0.26875852  0.63799308]
 [-1.71780965 -0.95951449 -1.34

### Splitting Training and Test data

In [22]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4,random_state=2)

In [18]:
print(x.shape,x_train.shape,x_test.shape)

(20, 4) (16, 4) (4, 4)


### Model Training

In [23]:
logistic_model=LogisticRegression()
logistic_model.fit(x_train,y_train)

### Model Evaluation

In [24]:
x_test_prediction=logistic_model.predict(x_test)

### Performance Metrics

In [25]:
test_data_accuracy=accuracy_score(y_test,x_test_prediction)
print(test_data_accuracy)

0.75


In [27]:
print("Confusion Matrix:\n", confusion_matrix(y_test, x_test_prediction))

Confusion Matrix:
 [[2 1]
 [1 4]]


### Making Predictions on New Input Data

In [29]:
new_input=pd.DataFrame({
    "Units_kWh": [11],
    "AC_Hours": [5],
    "Appliances_On": [8],
    "Usage_Hours": [13]
})
new_input_scaled=scaler.transform(new_input)
prediction=logistic_model.predict(new_input_scaled)
if prediction[0]==1:
    print("Electricity Overuse Detected")
else:
    print("Electricity Usage is Normal")


Electricity Overuse Detected
