In [1]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
appliances_energy_prediction = fetch_ucirepo(id=374) 
  
# data (as pandas dataframes) 
X = appliances_energy_prediction.data.features 
y = appliances_energy_prediction.data.targets 
  
# metadata 
print(appliances_energy_prediction.metadata) 
  
# variable information 
print(appliances_energy_prediction.variables) 


{'uci_id': 374, 'name': 'Appliances Energy Prediction', 'repository_url': 'https://archive.ics.uci.edu/dataset/374/appliances+energy+prediction', 'data_url': 'https://archive.ics.uci.edu/static/public/374/data.csv', 'abstract': 'Experimental data used to create regression models of appliances energy use in a low energy building.', 'area': 'Computer Science', 'tasks': ['Regression'], 'characteristics': ['Multivariate', 'Time-Series'], 'num_instances': 19735, 'num_features': 28, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Appliances'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2017, 'last_updated': 'Fri Mar 29 2024', 'dataset_doi': '10.24432/C5VC8G', 'creators': ['Luis Candanedo'], 'intro_paper': {'title': 'Data driven prediction models of energy use of appliances in a low-energy house', 'authors': 'L. Candanedo, V. Feldheim, Dominique Deramaix', 'published_in': 'Energy and Buildings, Volume 140', 'year': 

In [2]:
import pandas as pd
import numpy as np


In [3]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler

In [4]:
df=X

In [5]:
df.head()

Unnamed: 0,date,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-1117:00:00,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-1117:10:00,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,...,17.066667,45.56,6.48,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-1117:20:00,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,...,17.0,45.5,6.37,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,2016-01-1117:30:00,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.41039,45.41039
4,2016-01-1117:40:00,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,45.53,...,17.0,45.4,6.13,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


In [7]:
df.drop(columns=['date'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['date'], inplace=True)


In [8]:
#Scale the datafirst
scaler=StandardScaler()
df=scaler.fit_transform(df)

In [9]:
#Apply PCA to df

num_components=5
pca=PCA(n_components=num_components)
df=pca.fit_transform(df)

In [11]:
df=pd.DataFrame(data=df,columns=[i for i in range(num_components)])

In [12]:
df.head()

Unnamed: 0,0,1,2,3,4
0,-2.913949,-4.689526,1.43157,1.194512,2.472082
1,-2.932551,-4.618418,0.905247,1.226547,2.496514
2,-2.972968,-4.533169,-0.070195,1.353061,2.526499
3,-3.064495,-4.511911,-1.679798,1.647074,3.504469
4,-3.043124,-4.487672,1.683986,0.91064,3.493437


In [13]:
#Apply LDA to df

LDA_df=X
LDA_target=y

In [14]:
LDA_df.head()

Unnamed: 0,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,17.166667,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,17.166667,...,17.066667,45.56,6.48,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,17.166667,...,17.0,45.5,6.37,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,17.166667,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.41039,45.41039
4,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,45.53,17.2,...,17.0,45.4,6.13,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


In [15]:
LDA_target.head()

Unnamed: 0,Appliances
0,60
1,60
2,50
3,50
4,60


In [19]:
lda=LDA()
LDA_vals=lda.fit_transform(LDA_df,LDA_target)

  y = column_or_1d(y, warn=True)


In [20]:
lda_df_final = pd.DataFrame(data=LDA_vals, columns=[i for i in range(LDA_vals.shape[1])])

In [21]:
lda_df_final.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,1.235342,-1.263493,0.740458,-0.12506,3.510818,-0.926338,1.223482,0.221112,-0.616689,0.039779,...,-1.560964,-0.655874,1.745316,1.158933,-0.993661,0.843365,-2.048495,-0.195406,2.404614,1.026942
1,0.911479,-1.040466,0.614535,-0.478,3.082403,-1.275659,0.894311,0.158205,-0.637519,-0.117088,...,-1.852841,-0.314746,1.382088,1.1914,-1.006182,0.951408,-2.350183,-0.219367,2.220292,0.784096
2,0.795009,-1.017329,0.536744,-0.439224,2.790533,-1.551516,0.870657,0.099186,-0.824254,-0.26637,...,-2.043057,-0.307602,1.11468,1.161471,-0.84295,1.152692,-2.638937,-0.362778,1.835202,0.627153
3,1.221283,-1.010312,0.302103,-0.760811,3.384259,-2.231086,1.172721,-0.03919,-0.818767,-0.709856,...,-2.541294,0.000118,0.723614,1.002697,-0.426021,1.771592,-3.277475,-0.607158,1.514878,0.578219
4,1.303452,-1.12755,0.378559,-0.732103,3.835314,-2.110323,1.275668,0.233333,-0.339428,-0.425616,...,-2.192088,-0.16303,1.291995,0.547965,-1.477619,1.560087,-2.434423,-0.158218,1.435119,0.782723
