In [1]:
import pandas as pd 
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from pandas_profiling import ProfileReport
import pickle 
import seaborn as sns
import os,glob
from pathlib import Path
import zipfile
import requests


# Let's Create data folder to store data


In [12]:

def createFolder(folderame,path):
    newFolder=f"{path}\{folderame}"
    if not os.path.exists(newFolder):
        os.makedirs(newFolder)
    return newFolder
projectDir=os.getcwd()
datadir=createFolder("Data",projectDir)

# Let's Download our dataset to data folder by using request library
Data set link= https://archive.ics.uci.edu/ml/machine-learning-databases/00366/AReM.zip

In [22]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00366/AReM.zip'
r = requests.get(url)
with open(f"{datadir}\AReM.zip", "wb") as code:
    code.write(r.content)

# Let's Create our data folder to keep  Raw ,modified , merge and final data

In [36]:
rawdata=createFolder("Raw Data",datadir)
modifiedData=createFolder("Modified Data",datadir)
mergedata=createFolder("Merge Data",datadir)
finalData=createFolder("Final Data",datadir)
human_activitydata=f"{finalData}\human_activity.csv"

# let's get raw data by using getRawData() method. this method will unzip downloaded file and will keep raw data inside Raw Data folder which we created above 

In [24]:
def getRawData():
    zipFileName=os.listdir(datadir)
    try:
        for file in zipFileName:
            if ".zip" in file:
                with zipfile.ZipFile(f"{datadir}\{file}" , 'r') as zip_ref:
                    zip_ref.extractall(rawdata)
                    print(f"Successfully file extracted and stored inside {rawdata} path ")
                    
            else:
                print(f"{file}: is not  Zip file ")
    except Exception as e:
        print("Something gone wrong please check",e)

getRawData()

Successfully file extracted and stored inside C:\Users\Soumya Ranjan\I N E U R O N P R O J E C TS\ML class project\Human Activity Prediction\Data\Raw Data path 
Final Data: is not  Zip file 
Merge Data: is not  Zip file 
Modified Data: is not  Zip file 
Raw Data: is not  Zip file 


# getOnlyFolder() method will give name of the folder present in given dirictory 

In [25]:
def getOnlyFolder(path):
    allClassName=[]
    for file in os.listdir(rawdata):
        if '.' in file:
            print()

        else:
            allClassName.append(file)
            
    return allClassName


In [26]:
getOnlyFolder(rawdata)





['bending1', 'bending2', 'cycling', 'lying', 'sitting', 'standing', 'walking']

# addLebel() will help us to add lebel name as classname  to each data set

In [27]:
#here this method will add lebel name for all dataset and it will return new datasets
def addlebel(data,lebelname):
    data["lebel"]=lebelname
    return data
    

# let's get modified data by using getModifiedData() method. this method will get all dataset from rawdata folder and will keep modified data inside Modified Data folder which we created above 

In [28]:
def getModifiedData():
    rawClassfile=getOnlyFolder(rawdata)
    for className in rawClassfile:
        classdir=f"{rawdata}\{className}"
        allCSVFile=os.listdir(classdir)
        for csvFile in allCSVFile:
            csvfilepath=f"{classdir}\{csvFile}"
            modifiedcsvPath=f"{createFolder(className,modifiedData)}\{csvFile}"
            df=pd.read_csv(csvfilepath,skiprows=4,error_bad_lines=False)
            df.drop(columns=['# Columns: time'], inplace = True)
            df = addlebel(df,className)
            df.to_csv(modifiedcsvPath)

        
getModifiedData()





b'Skipping line 485: expected 7 fields, saw 8\n'
b'Skipping line 485: expected 7 fields, saw 8\n'


# let's get merge data by using mergeAllClasscsv() method. this method will get all dataset from modified folder and will keep merge data inside Merged Data folder which we created above 

In [29]:
def mergeAllClasscsv():
    modifiedClassfile=getOnlyFolder(modifiedData)
    for className in modifiedClassfile:
        classdir=f"{modifiedData}\{className}"
        allCSVFile=os.listdir(classdir)
        for file in allCSVFile:
            path=f"{classdir}\{file}"
            all_csv=glob.glob(os.path.join(classdir,"dataset*.csv"))
            df_from_each_csv = (pd.read_csv(f, sep=',',error_bad_lines=False) for f in all_csv)
            df_merged= pd.concat(df_from_each_csv, ignore_index=True)
            df_merged.to_csv(f"{mergedata}\{className}.csv")

        
mergeAllClasscsv()





# let's get final data by using getFinalData() method. this method will take  all dataset from merged data folder and will keep final data inside Final Data folder which we created above 

In [37]:
def getFinalData():
    allClassCsvFile=os.listdir(mergedata)
    all_csv=glob.glob(os.path.join(mergedata,"*.csv"))
    df_from_each_csv = (pd.read_csv(f, sep=',',error_bad_lines=False) for f in all_csv)
    df_merged= pd.concat(df_from_each_csv, ignore_index=True)
    df_merged.to_csv(human_activitydata)
    print("Successfully we created final data for further processing and data is present inside  ", human_activitydata)
        
getFinalData()

Successfully we created final data for further processing and data is present inside   C:\Users\Soumya Ranjan\I N E U R O N P R O J E C TS\ML class project\Human Activity Prediction\Data\Final Data\human_activity.csv


In [38]:
df=pd.read_csv(human_activitydata).head()
df