# Spring's constant measuring

## Importing libraries

In [82]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import pandas as pd
import os
from functools import reduce

from modules import LinearRegression

## Load data

Given a CSV file, use `pandas` to read the data into a table. Then make sure the entries have the correct `dtype` and truncate excess data.

In [83]:
# collect filenames of exported data

data_dir = "./data"

csv_data = []
excel_data = []

for root, dirs, files in os.walk(data_dir):
    for file in files:
        current_file_path = os.path.join(root, file).replace("\\", "/") # fix the unbearably frustrating flaws of the wanna-be OS... "Windows"
        if "xlsx" in file:
            excel_data.append(current_file_path)
        elif "csv" in file:
            csv_data.append(current_file_path)
    break # stop at first recursion level: only ./data

print(f"CSV:\n{csv_data}\nEXCEL:\n{excel_data}")

CSV:
['./data/sonar_non-pretensionata_statico.csv', './data/sonar_non-pretensionata_dinamico.csv', './data/sonar_pretensionata_dinamico.csv', './data/sonar_pretensionata_statico.csv']
EXCEL:
['./data/masse_sonar_non-pretensionata.xlsx', './data/dati laboratorio.xlsx', './data/misure_masse.xlsx', './data/calibro_non-pretensionata_statico.xlsx', './data/masse_sonar_pretensionata.xlsx', './data/calibro_pretensionata_statico.xlsx']


In [84]:
def read_sonar_data(filename: str):
    data = pd.read_csv(
            "Misure Molla1 Sensore (bene).csv", sep=";"
        ).replace(",", ".", regex=True)

    data.dropna(inplace=True)
    data.drop(index=data.index[0], axis=0, inplace=True)

    # data.convert_dtypes()
    for col in data.columns:
        data[col] = data[col].apply(pd.to_numeric)

    return data

def read_excel_data(filename: str):
    pass



datasets = dict()

for file in csv_data + excel_data:
   datasets[os.path.basename(file).split('.')[0]] = read_sonar_data(file)

print(datasets.keys())
print(datasets['misure_masse'])

dict_keys(['sonar_non-pretensionata_statico', 'sonar_non-pretensionata_dinamico', 'sonar_pretensionata_dinamico', 'sonar_pretensionata_statico', 'masse_sonar_non-pretensionata', 'dati laboratorio', 'misure_masse', 'calibro_non-pretensionata_statico', 'masse_sonar_pretensionata', 'calibro_pretensionata_statico'])


In [85]:
"""masse = pd.read_csv("Masse").convert_dtypes().to_dict()

print(masse)"""

'masse = pd.read_csv("Masse").convert_dtypes().to_dict()\n\nprint(masse)'

In [86]:
# TODO: make this a function that takes the filename as input and only outputs the DataFrame, then truncate and convert only when/where needed in a separate part of the code.
# TODO: write a portion of code that handles all the different data files and calls the right functions to anylize their data.

data = pd.read_csv("Misure Molla1 Sensore (bene).csv", sep=";").replace(
    ",", ".", regex=True
)

data.dropna(inplace=True)
data.drop(index=data.index[0], axis=0, inplace=True)

# data.convert_dtypes()
for col in data.columns:
    data[col] = data[col].apply(pd.to_numeric)

data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1029 entries, 1 to 1029
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Position (mm) Run #1  1029 non-null   float64
 1   Position (mm) Run #2  1029 non-null   float64
 2   Position (mm) Run #3  1029 non-null   float64
 3   Position (mm) Run #4  1029 non-null   float64
 4   Position (mm) Run #5  1029 non-null   float64
 5   Position (mm) Run #6  1029 non-null   float64
 6   Position (mm) Run #7  1029 non-null   float64
dtypes: float64(7)
memory usage: 64.3 KB


## Clean data

In [87]:
# this is only needed for the data acquired with the sonar

new_column_names = {f"Position (mm) Run #{i+1}": f"Run{i+1}(mm)" for i in range(0, 7)}
data = data.rename(columns=new_column_names)
del new_column_names

In [88]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1029 entries, 1 to 1029
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Run1(mm)  1029 non-null   float64
 1   Run2(mm)  1029 non-null   float64
 2   Run3(mm)  1029 non-null   float64
 3   Run4(mm)  1029 non-null   float64
 4   Run5(mm)  1029 non-null   float64
 5   Run6(mm)  1029 non-null   float64
 6   Run7(mm)  1029 non-null   float64
dtypes: float64(7)
memory usage: 64.3 KB


In [89]:
data.head()

Unnamed: 0,Run1(mm),Run2(mm),Run3(mm),Run4(mm),Run5(mm),Run6(mm),Run7(mm)
1,0.34,-12.56,-25.28,-38.01,-50.88,-63.96,-76.88
2,0.34,-12.56,-25.28,-37.98,-51.08,-64.16,-76.88
3,0.37,-12.53,-25.28,-38.18,-51.06,-64.16,-76.86
4,0.17,-12.73,-25.25,-38.18,-51.26,-64.13,-77.06
5,0.17,-12.73,-25.46,-38.18,-51.23,-64.33,-77.06


In [90]:
data.tail()

Unnamed: 0,Run1(mm),Run2(mm),Run3(mm),Run4(mm),Run5(mm),Run6(mm),Run7(mm)
1025,0.34,-12.73,-25.46,-38.36,-51.6,-64.33,-77.23
1026,0.31,-12.73,-25.46,-38.36,-51.6,-64.33,-77.23
1027,0.52,-12.73,-25.46,-38.36,-51.57,-64.33,-77.23
1028,0.52,-12.73,-25.43,-38.36,-51.77,-64.33,-77.23
1029,0.52,-12.73,-25.63,-38.36,-51.77,-64.33,-77.23


## Analyze data

For the **static method**, the sonar's signal is hypothesised to have a very small amplitude, so the measure of the mass' displacement is the mean of the recorded positions.<BR><BR>
For the **dynamic method**, we isolate the peaks of the oscillating signal and compute the mean of the period between two adjacent peaks. From there the resulting period is used to approximate the frequency of the oscillation.

In [91]:
# Analyze static method

averages = np.zeros(7)
for run in range(7):
    averages[run] += np.mean(data[f"Run{run+1}(mm)"])

averages = np.round_(averages, decimals=2)
print(averages)

# k = 9.81 * (masse[-1] - masse[0]) / (averages[-1] - averages[0])

[  0.19 -12.71 -25.71 -38.34 -51.45 -64.31 -77.2 ]


In [92]:
# Analyze dynamic method

def discrete_derivative(domain: list, curve: list):
    d = [0]
    for i in range(1, len(curve)):
        d.append((curve[i] - curve[i - 1]) / (domain[i] - domain[i - 1]))
    return d


# windowed peaks isolation
def windowed_peaks(signal: list):
    peaks = []
    for i in range(2, len(signal) - 2, 4):
        if (
            reduce(
                lambda x, y: x * y,
                discrete_derivative(list(range(i - 2, i + 3)), signal[i - 2 : i + 3]),
            )
            < 0
        ):
            peaks.append(max(signal[i - 2 : i + 3]))
    return peaks

In [93]:
def calculate_mean_period(column_id: str):
    wf = data[column_id].array
    for v in wf:
        print(windowed_peaks(wf))


calculate_mean_period(data[data.columns[0]])

# for col in data.columns:
# calculate_mean_period(col)

KeyError: "None of [Index([0.34, 0.34, 0.37, 0.17, 0.17, 0.17, 0.17, 0.17,  0.2,  0.0,\n       ...\n       0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.31, 0.52, 0.52, 0.52],\n      dtype='float64', length=1029)] are in the [columns]"