In [3]:
import pandas as pd
import pathlib
import glob

In [5]:
path = pathlib.Path('/Users/nilsjennissen/Documents/02_PRIVAT/01_STUDIUM/01_BTS/04_Data_Science_Foundations')
df = pd.DataFrame(path.rglob('*'))
df = df.rename(columns={0: 'Filepath'})

# Create a column with the filename
df['Filename'] = df['Filepath'].apply(lambda x: x.name)
# Create a column with the file extension
df['Fileextension'] = df['Filepath'].apply(lambda x: x.suffix)
# Create a column with the file size
df['Filesize'] = df['Filepath'].apply(lambda x: x.stat().st_size)
# Create a column with the file creation date
df['Filecreationdate'] = df['Filepath'].apply(lambda x: x.stat().st_ctime)
# Create a column with the file modification date
df['Filemodificationdate'] = df['Filepath'].apply(lambda x: x.stat().st_mtime)

# - Data Transformation -
# Transform date to datetime
df['Filecreationdate'] = pd.to_datetime(df['Filecreationdate'], unit='s')
df['Filemodificationdate'] = pd.to_datetime(df['Filemodificationdate'], unit='s')

In [6]:
df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01 17:19:26.951033344,2022-12-01 17:16:31.000000000
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10 23:11:12.736709120,2023-05-10 23:11:08.000000000
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30 01:51:21.110024704,2023-04-30 01:51:21.110024704
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29 13:31:40.031290368,2022-11-29 13:31:39.000000000
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01 14:51:31.832541184,2023-02-01 14:51:26.000000000


In [7]:
# Create column for files that have been modified this week
df['Modifiedthisweek'] = df['Filemodificationdate'].apply(lambda x: 'This week!' if x > pd.Timestamp.now() - pd.Timedelta(days=7) else 'Older')
df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate,Modifiedthisweek
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01 17:19:26.951033344,2022-12-01 17:16:31.000000000,Older
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10 23:11:12.736709120,2023-05-10 23:11:08.000000000,This week!
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30 01:51:21.110024704,2023-04-30 01:51:21.110024704,Older
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29 13:31:40.031290368,2022-11-29 13:31:39.000000000,Older
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01 14:51:31.832541184,2023-02-01 14:51:26.000000000,Older


In [8]:
# Transform Filecreationdate to date format
df['Filecreationdate'] = df['Filecreationdate'].dt.date
df['Filemodificationdate'] = df['Filemodificationdate'].dt.date

df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate,Modifiedthisweek
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01,2022-12-01,Older
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10,2023-05-10,This week!
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30,2023-04-30,Older
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29,2022-11-29,Older
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01,2023-02-01,Older


In [9]:
# Create column for file or folder when Fileextension is empty
df['Fileorfolder'] = df['Fileextension'].apply(lambda x: 'Folder' if x == '' else 'File')
df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate,Modifiedthisweek,Fileorfolder
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01,2022-12-01,Older,Folder
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10,2023-05-10,This week!,Folder
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30,2023-04-30,Older,Folder
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29,2022-11-29,Older,Folder
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01,2023-02-01,Older,Folder


In [10]:
# Create column for hidden or not hidden when Filename starts with a dot
df['Hidden'] = df['Filename'].apply(lambda x: 'Hidden' if x.startswith('.') else 'Not hidden')
df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate,Modifiedthisweek,Fileorfolder,Hidden
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01,2022-12-01,Older,Folder,Not hidden
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10,2023-05-10,This week!,Folder,Not hidden
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30,2023-04-30,Older,Folder,Hidden
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29,2022-11-29,Older,Folder,Not hidden
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01,2023-02-01,Older,Folder,Not hidden


In [11]:
# Get the parent folder name cut from the filepath
df['Parentfolder'] = df['Filepath'].apply(lambda x: x.parent.name)
df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate,Modifiedthisweek,Fileorfolder,Hidden,Parentfolder
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01,2022-12-01,Older,Folder,Not hidden,04_Data_Science_Foundations
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10,2023-05-10,This week!,Folder,Not hidden,04_Data_Science_Foundations
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30,2023-04-30,Older,Folder,Hidden,04_Data_Science_Foundations
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29,2022-11-29,Older,Folder,Not hidden,04_Data_Science_Foundations
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01,2023-02-01,Older,Folder,Not hidden,04_Data_Science_Foundations


In [5]:
# Combining all of this in a function
def get_csv(path):
    df = pd.DataFrame(path.rglob('*'))
    df = df.rename(columns={0: 'Filepath'})
    df['Filename'] = df['Filepath'].apply(lambda x: x.name)
    df['Fileextension'] = df['Filepath'].apply(lambda x: x.suffix)
    df['Filesize'] = df['Filepath'].apply(lambda x: x.stat().st_size)
    df['Filecreationdate'] = df['Filepath'].apply(lambda x: x.stat().st_ctime)
    df['Filemodificationdate'] = df['Filepath'].apply(lambda x: x.stat().st_mtime)
    df['Filecreationdate'] = pd.to_datetime(df['Filecreationdate'], unit='s')
    df['Filemodificationdate'] = pd.to_datetime(df['Filemodificationdate'], unit='s')
    df['Modifiedthisweek'] = df['Filemodificationdate'].apply(lambda x: 'This week!' if x > pd.Timestamp.now() - pd.Timedelta(days=7) else 'Older')
    df['Filecreationdate'] = df['Filecreationdate'].dt.date
    df['Filemodificationdate'] = df['Filemodificationdate'].dt.date
    df['Fileorfolder'] = df['Fileextension'].apply(lambda x: 'Folder' if x == '' else 'File')
    df['Hidden'] = df['Filename'].apply(lambda x: 'Hidden' if x.startswith('.') else 'Not hidden')
    df['Parentfolder'] = df['Filepath'].apply(lambda x: x.parent.name)
    return df


In [6]:
# Test the function
path = pathlib.Path('/Users/nilsjennissen/Documents/02_PRIVAT/01_STUDIUM/01_BTS/04_Data_Science_Foundations')
df = get_csv(path)
df.head()

Unnamed: 0,Filepath,Filename,Fileextension,Filesize,Filecreationdate,Filemodificationdate,Modifiedthisweek,Fileorfolder,Hidden,Parentfolder
0,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment8Bacteria,,288,2022-12-01,2022-12-01,Older,Folder,Not hidden,04_Data_Science_Foundations
1,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Assignment1,,544,2023-05-10,2023-05-10,This week!,Folder,Not hidden,04_Data_Science_Foundations
2,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,.DS_Store,,12292,2023-04-30,2023-04-30,Older,Folder,Hidden,04_Data_Science_Foundations
3,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture2,,320,2022-11-29,2022-11-29,Older,Folder,Not hidden,04_Data_Science_Foundations
4,/Users/nilsjennissen/Documents/02_PRIVAT/01_ST...,Lecture5,,384,2023-02-01,2023-02-01,Older,Folder,Not hidden,04_Data_Science_Foundations
