# Title: Wrangling csv files



## Preliminaries

In [9]:
# To auto-reload modules in jupyter notebook (so that changes in files *.py doesn't require manual reloading):
# https://stackoverflow.com/questions/5364050/reloading-submodules-in-ipython
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Import commonly used libraries and magic command for inline plotting

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [11]:
%matplotlib inline

## Work

This is where work gets done.

In [12]:
#1 Combine CSV Files into an Excel Workbook
import pandas as pd
from pathlib import Path
from openpyxl import Workbook, load_workbook

# Define the path to the logs folder and the output Excel file
logs_folder = Path("data")
output_file = "BCM.xlsx"

# Create a blank Excel workbook
wb = Workbook()
wb.save(output_file)

# Read each CSV file and insert into the Excel workbook
for csv_file in logs_folder.glob("*.csv"):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file, header=None, names=['datetime', 'scale', 'temperature'])
    
    # Load the existing workbook
    wb = load_workbook(output_file)
    
    # Add a new sheet with the name of the CSV file (without extension)
    sheet_name = csv_file.stem
    ws = wb.create_sheet(title=sheet_name)
    
    # Write the DataFrame to the new sheet
    for row in df.itertuples(index=False, name=None):
        ws.append(row)
    
    # Save the workbook
    wb.save(output_file)


In [None]:
#2 Add Formulas to Each Sheet
from openpyxl.utils.dataframe import dataframe_to_rows

# Load the workbook
wb = load_workbook(output_file)

# Add formulas to each sheet
for sheet in wb.sheetnames:
    if sheet == 'Sheet':  # Skip the initial blank sheet
        continue
    ws = wb[sheet]
    
    # Add headers
    ws.insert_rows(0)
    ws['A1'], ws['B1'], ws['C1'] = 'datetime', 'scale', 'temperature'
    
    # Determine the range of data
    max_row = ws.max_row
    
    # Add formulas for min, max, average temperature
    ws['G2'], ws['G3'], ws['G4'] = 'Min Temperature', 'Max Temperature', 'Avg Temperature'
    ws['H2'] = f"=MIN(C2:C{max_row})"
    ws['H3'] = f"=MAX(C2:C{max_row})"
    ws['H4'] = f"=AVERAGE(C2:C{max_row})"
    
    # Add formulas for min, max datetime
    ws['G6'], ws['G7'] = 'Min Datetime', 'Max Datetime'
    ws['H6'] = f"=MIN(A2:A{max_row})"
    ws['H7'] = f"=MAX(A2:A{max_row})"

# Save the workbook with formulas
wb.save(output_file)
