# Data from Analise's Thesis

In [2]:
import os
HOME = os.path.expanduser("~")
import sys
os.chdir('..')

In [3]:
import pandas as pd
from src.plot_utils import PlotUtils
from src.analise_thesis.plotter import Plotter
from src.analise_thesis.channel_data import ChannelData
import matplotlib.pyplot as plt
from pprint import pprint
from scipy.stats import f_oneway
import scipy.stats as stats
import numpy as np

### Set general figure parameters

In [4]:
figure_store_path = os.path.join(HOME, 'data/figs_sensors_actuators')
os.makedirs(figure_store_path, mode = 0o777, exist_ok = True)
fig_format = 'svg'
tick_fontsize = 14
is_title_and_labels: bool = True
aspect = 1 # both axes are of equal length (square plot)
PlotUtils.set_user_figure_dir(user_fig_directory=figure_store_path)

In [5]:
data = [
    (896, 1, 'hard'),
    (764, 1, 'hard'),
    (608, 1, 'hard'),
    (512, 1, 'hard'),
    (764, 2, 'hard'),
    (608, 2, 'hard'),
    (512, 2, 'hard'),
    (384, 2, 'hard'),
    (288, 2, 'hard'),
    (996, 3, 'hard'),
    (800, 3, 'hard'),
    (764, 3, 'hard'),
    (764, 4, 'hard'),
    (512, 4, 'hard'),
    (384, 4, 'hard'),
    (192, 4, 'hard'),
    (720, 1, 'soft'),
    (720, 3, 'soft'),
    (720, 5, 'soft'),
    (720, 6, 'soft'),
    (960, 1, 'soft'),
    (960, 5, 'soft'),
    (960, 6, 'soft'),
    (960, 7, 'soft'),
    (840, 1, 'soft'),
    (840, 5, 'soft'),
    (840, 6, 'soft'),
    (1080, 1, 'soft'),
    (1080, 2, 'soft'),
    (1080, 3, 'soft'),
    (1080, 5, 'soft'),
    (1080, 6, 'soft'),
    (1080, 7, 'soft'),
    (600, 2, 'soft'),
    (600, 3, 'soft'),
    (600, 5, 'soft'),
    (480, 2, 'soft'),
]

In [6]:
df = pd.DataFrame()
for datum in data:
    channel_data = ChannelData(channel_width=datum[0], chip_id=datum[1], chip_type=datum[2])
    xdata, ydata, yerr = channel_data.get_data()
    df_ = pd.DataFrame({
        'width': datum[0], 'chip_id': datum[1], 'chip_type': datum[2], 'num_injections': channel_data.num_injections, 'xdata': xdata, 'ydata': ydata, 'yerr': yerr
    })
    df = pd.concat([df_, df])
df

Unnamed: 0,width,chip_id,chip_type,num_injections,xdata,ydata,yerr
0,480,2,soft,2,1,118.400,0.35460
1,480,2,soft,2,2,77.340,1.16600
0,600,5,soft,5,1,220000.000,0.00000
1,600,5,soft,5,2,1633.000,3.79300
2,600,5,soft,5,3,175.800,0.15410
...,...,...,...,...,...,...,...
0,896,1,hard,5,1,138.400,0.69690
1,896,1,hard,5,2,16.590,0.13660
2,896,1,hard,5,3,8.512,0.20160
3,896,1,hard,5,4,6.235,0.03491


In [7]:
df_last_injection = df.loc[df.xdata == df.num_injections]
df_last_injection

Unnamed: 0,width,chip_id,chip_type,num_injections,xdata,ydata,yerr
1,480,2,soft,2,2,77.34,1.166
4,600,5,soft,5,5,35.87,1.209
5,600,3,soft,6,6,28.38,2.303
2,600,2,soft,3,3,26.95,0.03176
4,1080,7,soft,5,5,15.32,0.02428
4,1080,6,soft,5,5,22.33,0.1064
4,1080,5,soft,5,5,9.375,0.05807
4,1080,3,soft,5,5,8.165,0.0271
4,1080,2,soft,5,5,8.276,0.1182
4,1080,1,soft,5,5,11.79,0.08083


#### Filter the dataframe for the largest width in each type and also get the error (standard deviation) and turn it into the variance (by squaring)

In [8]:
width_threshold = 600
soft = df_last_injection.loc[(df_last_injection.chip_type == 'soft') & (df_last_injection.width >= width_threshold)].ydata
soft_var = df_last_injection.loc[(df_last_injection.chip_type == 'soft') & (df_last_injection.width >= width_threshold)].yerr**2
hard = df_last_injection.loc[(df_last_injection.chip_type == 'hard') & (df_last_injection.width >= width_threshold)].ydata
hard_var = df_last_injection.loc[(df_last_injection.chip_type == 'hard') & (df_last_injection.width >= width_threshold)].yerr**2
# print(soft)
# print(hard)

### T-test assuming equal variances, assuming the means of the distribution underlying the samples are unequal
We use the t-test assuming equal variances. Further below we determined that we can discard the hypothesis of unequal variances.

In [12]:
print(stats.ttest_ind(soft, hard, axis=0, equal_var=True, alternative='two-sided'))

Ttest_indResult(statistic=4.280321883904488, pvalue=0.00021005775478681366)


In [13]:
stats.f_oneway(soft, hard)

F_onewayResult(statistic=18.321155429831656, pvalue=0.00021005775478681406)

In [14]:
print(f'descriptive statistics: {stats.describe(hard)}')
print(f'standard error of the mean: {stats.sem(hard)}')
print(f'descriptive statistics: {stats.describe(soft)}')
print(f'standard error of the mean: {stats.sem(soft)}')

descriptive statistics: DescribeResult(nobs=9, minmax=(4.833, 10.63), mean=7.385777777777779, variance=3.7887791944444453, skewness=0.6089396198676307, kurtosis=-0.8595909830209396)
standard error of the mean: 0.6488262217646439
descriptive statistics: DescribeResult(nobs=20, minmax=(8.165, 41.84), mean=22.469250000000002, variance=107.93507240789474, skewness=0.18284560503856587, kurtosis=-1.0719911110587048)
standard error of the mean: 2.32309139303531


### Compute F-Statistic
Computing the F-statistic is necessary to determine whether the variances in the two groups, resistances in soft and hard material, are equal.

1. Compute the variance of the samples

In [9]:
variance1 = np.var(soft, ddof=1)
variance2 = np.var(hard, ddof=1)
print(f'variance1: {variance1}, variance2: {variance2}')
f_value = variance1 / variance2
print(f'F-statistic: {f_value}')

variance1: 107.93507240789474, variance2: 3.7887791944444453
F-statistic: 28.488087288423106


2. Compute degrees of freedom for each dataset

In [10]:
deg_freedom_soft = len(soft) - 1
deg_freedom_hard = len(hard) - 1

3. Calculate p-value for F-statistic

In [11]:
p_value = stats.f.cdf(f_value, deg_freedom_hard, deg_freedom_soft)
print(p_value)

0.9999999946811363


Based on the p-value we discard the hypothesis that the variance are unequal.

### Comparing the variance after the first and the 4th injection

#### Hard chips

In [37]:
df_first_injection = df.loc[(df.xdata == 1)  & (df.width >= width_threshold) & (df.chip_type == 'hard')]
df_first_injection

Unnamed: 0,width,chip_id,chip_type,num_injections,xdata,ydata,yerr
0,764,4,hard,5,1,82.51,0.01659
0,764,3,hard,4,1,220000.0,0.0
0,800,3,hard,5,1,98.74,1.553
0,996,3,hard,5,1,134.9,0.3484
0,608,2,hard,5,1,2257.0,0.01547
0,764,2,hard,5,1,220000.0,0.0
0,608,1,hard,4,1,398.0,0.7615
0,764,1,hard,5,1,225.3,0.1587
0,896,1,hard,5,1,138.4,0.6969


In [38]:
df_first_injection.var(numeric_only=True)

width             1.504444e+04
chip_id           1.194444e+00
num_injections    1.944444e-01
xdata             0.000000e+00
ydata             9.370867e+09
yerr              2.779655e-01
dtype: float64

The variance of the electrical resistance in hard chips after the first injection is $9.4 \times 10e^9$ $K\Omega^2$.

In [39]:
df_forth_injection = df.loc[(df.xdata == 4)  & (df.width >= width_threshold) & (df.chip_type == 'hard')]
df_forth_injection

Unnamed: 0,width,chip_id,chip_type,num_injections,xdata,ydata,yerr
3,764,4,hard,5,4,23.23,0.008411
3,764,3,hard,4,4,10.14,0.1744
3,800,3,hard,5,4,6.853,0.0467
3,996,3,hard,5,4,6.265,0.0653
3,608,2,hard,5,4,11.28,0.2164
3,764,2,hard,5,4,14.23,2.101
3,608,1,hard,4,4,10.63,0.02621
3,764,1,hard,5,4,8.656,0.03024
3,896,1,hard,5,4,6.235,0.03491


In [40]:
df_forth_injection.var(numeric_only=True)

width             15044.444444
chip_id               1.194444
num_injections        0.194444
xdata                 0.000000
ydata                28.566799
yerr                  0.461080
dtype: float64

The variance of the electrical resistance in hard chips after the forth injection is 28.6 $K\Omega^2$.

#### Soft chips

In [41]:
df_first_injection = df.loc[(df.xdata == 1)  & (df.width >= width_threshold) & (df.chip_type == 'soft')]
df_first_injection

Unnamed: 0,width,chip_id,chip_type,num_injections,xdata,ydata,yerr
0,600,5,soft,5,1,220000.0,0.0
0,600,3,soft,6,1,8674.0,997.1
0,600,2,soft,3,1,42300.0,7077.0
0,1080,7,soft,5,1,45749.0,1192.0
0,1080,6,soft,5,1,220000.0,0.0
0,1080,5,soft,5,1,783.1,8.361
0,1080,3,soft,5,1,3982.0,63.82
0,1080,2,soft,5,1,220000.0,0.0
0,1080,1,soft,5,1,220000.0,0.0
0,840,6,soft,5,1,108098.0,368.4


In [42]:
df_first_injection.var(numeric_only=True)

width             3.198316e+04
chip_id           4.526316e+00
num_injections    3.052632e-01
xdata             0.000000e+00
ydata             9.645533e+09
yerr              7.667589e+06
dtype: float64

The variance of the electrical resistance in hard chips after the forth injection is $9.65 \times 10^9$ $K\Omega^2$.

In [43]:
df_forth_injection = df.loc[(df.xdata == 4)  & (df.width >= width_threshold) & (df.chip_type == 'soft')]
df_forth_injection

Unnamed: 0,width,chip_id,chip_type,num_injections,xdata,ydata,yerr
3,600,5,soft,5,4,56.46,0.09612
3,600,3,soft,6,4,31.96,0.1567
3,1080,7,soft,5,4,28.37,0.07466
3,1080,6,soft,5,4,37.51,0.05456
3,1080,5,soft,5,4,404.3,35.28
3,1080,3,soft,5,4,14.73,0.1829
3,1080,2,soft,5,4,9.813,0.02883
3,1080,1,soft,5,4,33.73,0.07072
3,840,6,soft,5,4,35.23,0.03325
3,840,5,soft,5,4,53.2,0.1883


In [45]:
df_forth_injection.var(numeric_only=True)

width             29305.263158
chip_id               4.543860
num_injections        0.111111
xdata                 0.000000
ydata              7297.950068
yerr                 65.078020
dtype: float64

The variance of the electrical resistance in soft chips after the forth injection is 7298 $K\Omega^2$.