# Importing needed libraries

In [None]:
import os, time
import pandas as pd
from datetime import datetime 
from stat import ST_SIZE, ST_MTIME

# Defining Paths 

In [None]:
# Define the path to the folder to be analyzed
path = 'C:\\temp\\Instagram'
path_raiz = os.getcwd()
output_file = 'lista_de_arquivos.txt'

# Creating auxiliary variables

In [None]:
# Listing first level paths
pastas = os.listdir(path)
total = 0

# Create auxiliary lists 
lista_arquivos = list()
erros = list()

# Defining Functions

In [None]:
def busca_info_arquivo(file_path):
    """
    Retrieve file size and creation date from the given file path.
    """
    # Getting file information
    st = os.stat(file_path) 
    # Converting dates
    dt_cr = time.asctime(time.localtime(st[ST_MTIME]))
    data_converte = datetime.strptime(dt_cr, '%a %b %d %H:%M:%S %Y')
    data_criacao = data_converte.date()
    tamanho = st[ST_SIZE] / 1024 / 1024

    return tamanho, data_criacao

def listar_pasta(pasta):
    """
    Recursively list files in the specified directory and its subdirectories.
    """
    tot = 0
    if os.path.isdir(os.path.join(path, pasta)):
        items = os.listdir(os.path.join(path, pasta))
        for item in items:
            novo_item = os.path.join(path, pasta, item)
            # Check if item is a directory. If yes, append to path list
            if os.path.isdir(novo_item):
                pastas.append(novo_item)
                continue
            fullpath = os.path.join(path, pasta, novo_item)
            tamanho, data_criacao = busca_info_arquivo(fullpath)
            # Create string to append in the list
            item_lista = (os.path.join(path, pasta), item, tamanho, data_criacao)
            lista_arquivos.append(item_lista)
            # Update counter
            tot += 1
    else:
        novo_item = os.path.join(path, pasta)
        tamanho, data_criacao = busca_info_arquivo(novo_item)
        # Create string to append in the list
        item_lista = (path, pasta, tamanho, data_criacao)
        lista_arquivos.append(item_lista)
        
        # Update counter
        tot += 1

    return tot

# Creating Main function

In [None]:
# Main function to analyze the directory and its subdirectories
if __name__ == '__main__':

    for pasta in pastas:
        # Updating counter
        try:
            total += listar_pasta(pasta)
        # Handling large addresses not supported by Windows
        except:
            erros.append(pasta)
    print("Total de arquivos encontrados: ", total)

# Checking

In [None]:
# Print a sample of the file list
print('Files')
for item in lista_arquivos[0:10]:
    print(item)

# Print a sample of errors list
print('\nErros\n')
for item in erros[0:10]:
    print(item)

# Creating Dataframe

In [None]:
# Convert list to a Pandas Dataframe

# Colunm names
indice = ('pasta', 'arquivo', 'tamanho', 'data_criacao')

df = pd.DataFrame(lista_arquivos, columns = indice)
df = df.sort_values(['pasta', 'arquivo'])
df.head()

# Exportng CSV

In [None]:
# Save the DataFrame to a CSV file
dt_temp = time.localtime() 
dt_hoje = str(dt_temp.tm_year) + str(dt_temp.tm_mon).zfill(2) + str(dt_temp.tm_mday)
nome_arquivo = path_raiz + '//df_arquivos_'+ dt_hoje + '.csv'

df.to_csv(nome_arquivo, encoding='latin-1', sep=';', decimal=',', index = False) 