In [1]:
import dask.dataframe as dd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import pandas as pd
from datetime import datetime
import os
def euclid(df):
    return np.sqrt((df['centroid_x'] - df['centroid_x'].shift(1))**2 + 
                   (df['centroid_y'] - df['centroid_y'].shift(1))**2
)

In [2]:
pth = Path(r'/home/oldboy/Documents/GitHub/thermal tognini/thermal_data')

content = os.listdir(pth)
content = list(filter(lambda x: (pth/x).is_dir(), content ) ) 

subjects = dict()
for c in content:
    parts = c.split('-')
    sub_id = parts[2]
    if sub_id not in subjects.keys():
        subjects[sub_id] = dict()
        subjects[sub_id]['id'] = parts[2] 
        subject_parts = subjects[sub_id]['id'].split('_')
        subjects[sub_id]['geno'] = subject_parts[0]
        subjects[sub_id]['number'] = subject_parts[1]
        subjects[sub_id]['recordings'] = [(pth/c/'data.csv').as_posix()]
    else:
        subjects[sub_id]['recordings'].append( (pth/c/'data.csv').as_posix() )

subjects = pd.DataFrame.from_dict(subjects).T.reset_index(drop=True)
subjects['recordings'] = subjects['recordings'].apply(sorted)


In [52]:

for i,row in subjects.iterrows():
    if i==6:
        break

data = list()
for i,rec in enumerate(row['recordings']):
    df = dd.read_csv(rec,sep=';', skiprows=1,dtype={'isDay': 'float64'}, parse_dates=['Date'])
    df['temp_med_delta'] = (df['temp_med']-df['temp_med'].mean())
    df['distance'] = euclid(df)
    df['RT_delta'] = df['RT']-df['RT'].mean()
    data.append(df)

data = dd.concat(data)
data['start_date'] = data['Date'].min()
data['day'] = (data['Date'] - data['start_date']).dt.days + 1
data = data.groupby(['minute','day']).mean().compute()
data['temp_rt_delta'] = data['temp_avg']-data['RT']
data['temp_rt_delta'] = data['temp_rt_delta'] - data['temp_rt_delta'].mean()
data['temp_norm'] = data['temp_avg']-data['temp_avg'].mean()
data['RT_norm'] = data['RT']-data['RT'].mean()
data['temp_rt_correct'] = data['temp_norm']-data['RT_norm']

#min_date = data['Date'].min().compute()
#data = data.set_index('Date')
#data = data.assign(data_rel = (data['Date'] - min_date).dt.days.compute() ) 
#data.head()



In [53]:
data.reset_index().loc[1430:1450]

Unnamed: 0,minute,day,ID,timeStamp,hour,RT,isDay,temp_avg,temp_med,temp_max,centroid_x,centroid_y,temp_med_delta,distance,RT_delta,temp_rt_delta,temp_norm,RT_norm,temp_rt_correct
1430,1424,1,419277.5,476317.531743,23.0,23.985465,0.0,28.776408,28.776408,31.064431,41.888358,52.438744,1.571694,1.867469,0.470411,1.43317,1.889134,0.455963,1.43317
1431,1425,1,419780.5,476377.560235,23.0,23.958066,0.0,28.608056,28.608056,30.927097,46.650149,73.180894,1.403342,2.266135,0.443013,1.292217,1.720782,0.428565,1.292217
1432,1426,1,420285.5,476437.51847,23.0,23.926526,0.0,28.752369,28.752369,31.086135,38.08613,59.152929,1.547655,1.45819,0.411473,1.46807,1.865095,0.397025,1.46807
1433,1427,1,420794.0,476497.597559,23.0,23.890227,0.0,28.469829,28.469829,30.606418,39.100391,62.851642,1.265114,2.044352,0.375174,1.221828,1.582554,0.360726,1.221828
1434,1428,1,421302.0,476557.509838,23.0,23.851084,0.0,28.826136,28.826136,31.195985,41.37148,61.91286,1.621422,1.73891,0.336031,1.617279,1.938861,0.321583,1.617279
1435,1429,1,421809.5,476617.463066,23.0,23.808192,0.0,28.271877,28.271877,30.214434,51.307904,98.470162,1.067163,0.561198,0.293138,1.105912,1.384602,0.27869,1.105912
1436,1430,1,422316.0,476677.356255,23.0,23.773753,0.0,28.136964,28.136964,30.00313,60.562221,102.518459,0.93225,0.191677,0.2587,1.005438,1.24969,0.244252,1.005438
1437,1431,1,422819.5,476737.601221,23.0,23.799933,0.0,28.065525,28.065525,29.926337,60.905838,104.323225,0.86081,0.212968,0.28488,0.907818,1.17825,0.270432,0.907818
1438,1432,1,423321.5,476797.596167,23.0,23.814149,0.0,27.89522,27.89522,29.929871,68.442647,87.079832,0.690505,0.763175,0.299096,0.723297,1.007945,0.284648,0.723297
1439,1433,1,423825.5,476857.485021,23.0,23.826267,0.0,27.762913,27.762913,29.941143,62.942946,81.34211,0.558199,1.533953,0.311213,0.578873,0.875639,0.296765,0.578873


In [54]:
%matplotlib notebook
fig,ax = plt.subplots(2,1)

temperature = data['temp_rt_delta'].reset_index(drop=True).rolling(5,min_periods=1,center=True).median()
ax[0].plot(temperature)
ax[0].axhline(0,color='k',linestyle='--')
lims = ax[0].get_ylim()
ax[0].fill_between(temperature.index, lims[0], lims[1],where= data['isDay'].reset_index(drop=True)<.00001,alpha=.2)

motion = data['distance'].reset_index(drop=True).rolling(5,min_periods=1,center=True).median()
ax[1].plot(motion)
lims = ax[1].get_ylim()
ax[1].fill_between(temperature.index, lims[0], lims[1],where= data['isDay'].reset_index(drop=True)<.00001,alpha=.2)


<IPython.core.display.Javascript object>

<matplotlib.collections.PolyCollection at 0x7fb70ae67850>

In [55]:
# periodogramma che computa i picchi di oscillazione

import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from statsmodels.stats.multitest import fdrcorrection

# Generate a sample signal
fs = 60  # Sample rate (Hz) - equivalent to 1 hour
x = temperature

# Compute the periodogram using the Welch method
#frequencies, power_spectrum = signal.welch(x, fs, nperseg=1440*3)
frequencies, power_spectrum = signal.periodogram(x, fs)

period = 1 / frequencies

sel = period <= 96
power_spectrum = power_spectrum[sel]
period = period[sel]

#p, _ = signal.find_peaks(power_spectrum)
#threshold = np.mean(power_spectrum[p]) * (np.std(power_spectrum[p]) * 35)

#peaks, _ = signal.find_peaks(power_spectrum, height=threshold)
#peak_amplitudes = power_spectrum[peaks]
#sorted_indices = np.argsort(peak_amplitudes)[::-1]  # Sort indices in descending order
p_values = 1 - power_spectrum  # Convert power values to p-values
rejected, _ = fdrcorrection(p_values)
#threshold = np.max(power_spectrum[rejected])

# Extract the sorted peaks and amplitudes
sorted_peaks = period[rejected]
sorted_amplitudes = power_spectrum[rejected]
peaks_signi = pd.DataFrame({'Period (hours)': sorted_peaks, 'Amplitude': sorted_amplitudes})

# Plot the periodogram
plt.figure()
plt.plot(period, power_spectrum)
#plt.axhline(threshold, color='k', linestyle='--')
plt.plot(period[rejected], power_spectrum[rejected], 's')

plt.xlabel('Period (Hours)')
plt.ylabel('Power Spectral Density')
plt.title('Periodogram')


peaks_signi

  period = 1 / frequencies


<IPython.core.display.Javascript object>

Unnamed: 0,Period (hours),Amplitude
0,84.016667,1.689166
1,42.008333,1.045579
2,28.005556,3.729761
3,24.004762,11.402238
4,12.002381,3.425564
5,8.001587,2.461256
6,4.800952,2.349731
7,3.429252,2.20721
8,2.897126,1.116856
9,2.182251,1.386206


In [12]:
power

NameError: name 'power' is not defined

In [16]:
import numpy as np
import pandas as pd
from scipy.signal import periodogram,fft
from scipy.stats import circmean
from statsmodels.stats.multitest import fdrcorrection

fs = 60  # Sampling frequency (1 minute)
t=temperature.index.values

# Compute the periodogram of the temperature signal
frequencies, power = periodogram(temperature, fs)
spectrum = fft.fft(temperature)
phase_spectrum = np.angle(spectrum)

# Perform FDR correction to adjust p-values
p_values = 1 - power  # Convert power values to p-values
rejected, _ = fdrcorrection(p_values)

# Set the threshold based on the corrected p-values
#threshold = np.max(power[rejected])

# Find frequencies above the threshold
significant_frequencies = frequencies[rejected] #power >= threshold]

# Calculate the period (in hours) of the significant frequencies
periods = 1 / significant_frequencies #* 60  # Convert to hours

# Calculate the acrophase (in hours) of the significant frequencies
acrophases = np.arccos(np.cos(2 * np.pi * significant_frequencies * t.mean())) / (2 * np.pi) * 24

# Calculate the mesor (mean value) of the temperature signal
mesor = np.mean(temperature)

# Create a pandas DataFrame to store the data of the significant components
data = pd.DataFrame({
    'Period (hours)': periods,
    'Acrophase (hours)': acrophases,
})

# Print the mesor and significant components
print("Mesor:", mesor)
print("Significant Components:")
print(data)


ImportError: cannot import name 'fft' from 'scipy.signal' (/home/oldboy/anaconda3/lib/python3.9/site-packages/scipy/signal/__init__.py)

In [78]:
import numpy as np
import pandas as pd
from astropy.timeseries import LombScargle

# Generate a sample signal with multiple dominant frequencies
fs = 60
t = temperature.index
x = temperature

# Perform Lomb-Scargle periodogram analysis
frequency, power = LombScargle(t, x).autopower()

# Create a DataFrame to store the periodogram analysis results
df = pd.DataFrame({'Frequency': frequency, 'Power': power})

# Sort the peaks based on power (amplitude)
df = df.sort_values(by='Power', ascending=False)

# Print the DataFrame
fig,ax = plt.subplots()
plt.plot(frequency,power)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fbcca2e68e0>]

In [36]:
frequencies

array([  0.   ,   5.625,  11.25 ,  16.875,  22.5  ,  28.125,  33.75 ,
        39.375,  45.   ,  50.625,  56.25 ,  61.875,  67.5  ,  73.125,
        78.75 ,  84.375,  90.   ,  95.625, 101.25 , 106.875, 112.5  ,
       118.125, 123.75 , 129.375, 135.   , 140.625, 146.25 , 151.875,
       157.5  , 163.125, 168.75 , 174.375, 180.   , 185.625, 191.25 ,
       196.875, 202.5  , 208.125, 213.75 , 219.375, 225.   , 230.625,
       236.25 , 241.875, 247.5  , 253.125, 258.75 , 264.375, 270.   ,
       275.625, 281.25 , 286.875, 292.5  , 298.125, 303.75 , 309.375,
       315.   , 320.625, 326.25 , 331.875, 337.5  , 343.125, 348.75 ,
       354.375, 360.   , 365.625, 371.25 , 376.875, 382.5  , 388.125,
       393.75 , 399.375, 405.   , 410.625, 416.25 , 421.875, 427.5  ,
       433.125, 438.75 , 444.375, 450.   , 455.625, 461.25 , 466.875,
       472.5  , 478.125, 483.75 , 489.375, 495.   , 500.625, 506.25 ,
       511.875, 517.5  , 523.125, 528.75 , 534.375, 540.   , 545.625,
       551.25 , 556.

In [12]:

def parsedate(date):
    try:
        return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').day
    except:
        return np.nan

#for i,row in df.iterrows():
#     print( int( row['Date'].split(' ')[0].split('-')[-1]) ) 
df['Day'] = df['Date'].apply(lambda x: int(x.split(' ')[0].split('-')[-1]), meta=('int') )
df['Day_rel'] = df['Day'] - df['Day'].unique().compute().min()
#df['Day_rel'] = df['Day']-df['Day'].loc[0]
df.head()



Unnamed: 0,index_old,ID,Date,timeStamp,hour,minute,RT,isDay,temp_avg,temp_med,temp_max,centroid_x,centroid_y,distance,Day
0,0,1.0,2023-05-19 12:00:00,2027.809737,12.0,720.0,20.48652,1.0,25.49587,25.49587,28.647199,76.937322,45.58859,,19
1,1,2.0,2023-05-19 12:00:00,2028.015251,12.0,720.0,20.493791,1.0,25.325776,25.325776,28.440092,78.679304,45.558714,1.742238,19
2,2,3.0,2023-05-19 12:00:00,2028.127524,12.0,720.0,20.500572,1.0,25.444307,25.444307,28.552307,78.295316,45.377368,0.424657,19
3,3,4.0,2023-05-19 12:00:00,2028.245897,12.0,720.0,20.496732,1.0,25.381336,25.381336,28.46603,77.857371,44.580024,0.9097,19
4,4,5.0,2023-05-19 12:00:00,2028.35584,12.0,720.0,20.49232,1.0,25.401985,25.401985,28.64029,78.615925,44.82835,0.798167,19


In [22]:
df['Day_rel'].unique().compute()

0    0
1    1
2    2
3    3
Name: Day_rel, dtype: int64

In [17]:

#df['mot_dist'] = np.sqrt((df['centroid_x'] - df['centroid_x'].shift(1))**2 + (df['centroid_y'] - df['centroid_y'].shift(1))**2)
#df['mot_dist'] = df['mot_dist'].fillna(0)



avg_min
#avg_min

Unnamed: 0,ID,timeStamp,hour,RT,isDay,temp_avg,temp_med,temp_max,centroid_x,centroid_y,distance
0,1.336434e+06,155847.136659,6.0,20.928277,0.000000,24.925562,24.925562,27.409255,45.632617,75.221100,0.663761
1,1.339281e+06,156172.399806,6.0,20.909420,0.597041,25.012292,25.012292,27.473522,44.624407,77.711148,0.630961
2,1.339671e+06,156218.470433,6.0,20.905223,1.000000,24.838781,24.838781,27.161659,48.362016,86.507362,0.468760
3,1.343349e+06,156640.463707,6.0,20.903320,1.000000,24.810668,24.810668,27.235981,51.050258,91.076157,0.222031
4,1.343131e+06,156616.988766,6.0,20.898115,1.000000,24.820788,24.820788,27.236991,48.647908,89.847007,0.322865
...,...,...,...,...,...,...,...,...,...,...,...
1436,1.345731e+06,156909.814315,6.0,20.934016,0.000000,24.817289,24.817289,27.277771,47.785162,85.979589,0.530997
1437,1.341345e+06,156406.007745,6.0,20.939502,0.000000,24.786358,24.786358,27.323865,44.175829,81.319092,0.785412
1438,1.336503e+06,155852.942435,6.0,20.933071,0.000000,24.851544,24.851544,27.305942,46.083188,80.210620,0.840904
1439,1.334818e+06,155660.904451,6.0,20.906522,0.000000,24.913386,24.913386,27.426630,43.668838,81.474248,0.728567


In [18]:

%matplotlib notebook

fig,ax = plt.subplots(3,1,sharex=True)

avg_min[['temp_avg','RT','distance']] = avg_min[['temp_avg','RT','distance']].rolling(5,center=True,min_periods=1).median()
ax[0].plot(avg_min.index,avg_min['temp_avg'])
ylim = ax[0].get_ylim()
ax[0].fill_between(avg_min.index,y1=ylim[0],y2=ylim[1], where=avg_min['isDay']<0.5,alpha=.2)

ax[1].plot(avg_min.index,avg_min['RT'])
ylim = ax[1].get_ylim()
ax[1].fill_between(avg_min.index,y1=ylim[0],y2=ylim[1], where=avg_min['isDay']<0.5,alpha=.2)

ax[2].plot(avg_min.index,avg_min['distance'])
ylim = ax[2].get_ylim()
ax[2].fill_between(avg_min.index,y1=ylim[0],y2=ylim[1], where=avg_min['isDay']<0.5,alpha=.4)


<IPython.core.display.Javascript object>

<matplotlib.collections.PolyCollection at 0x26782151880>

In [23]:


pth = Path(r'C:\Users\pupil\Downloads\thermal_behaviour_72_h')


def readTher(fnames):
    df = dd.read_csv((fnames/'data.csv').as_posix(), sep=';', skiprows=1, 
                     assume_missing=True)  
    df['distance'] = df.map_partitions(euclid) # da cambiare
    df = df.reset_index().rename(columns={'index':'index_old'})

    avg_min = df.groupby(['ID']).mean()
    avg_min = df.groupby(['minute']).mean()

    avg_min = avg_min.compute()
    avg_min = pd.concat([avg_min.loc[407:],avg_min.loc[:407]]).reset_index(drop=True)
    return avg_min

fnames = pth / '20230519_113859-ROI_*-WT_*'
wt = readTher(fnames)
fnames = pth / '20230519_113859-ROI_*-KO_*'
ko = readTher(fnames)


In [29]:
%matplotlib notebook

fig,ax = plt.subplots(3,1,sharex=True)

wt[['temp_avg','RT','distance']] = wt[['temp_avg','RT','distance']].rolling(5,center=True,min_periods=1).median()
ko[['temp_avg','RT','distance']] = ko[['temp_avg','RT','distance']].rolling(5,center=True,min_periods=1).median()

ax[0].plot(wt.index,wt['temp_avg'],'g')
ax[0].plot(ko.index,ko['temp_avg'],'r')
ylim = ax[0].get_ylim()
ax[0].fill_between(wt.index,y1=ylim[0],y2=ylim[1], where=wt['isDay']<0.5,alpha=.2)

ax[1].plot(wt.index,wt['RT'],'g')
ax[1].plot(ko.index,ko['RT'],'r')
ylim = ax[1].get_ylim()
ax[1].fill_between(wt.index,y1=ylim[0],y2=ylim[1], where=wt['isDay']<0.5,alpha=.2)

ax[2].plot(wt.index,wt['distance'],'g')
ax[2].plot(ko.index,ko['distance'],'r')
ylim = ax[2].get_ylim()
ax[2].fill_between(wt.index,y1=ylim[0],y2=ylim[1], where=wt['isDay']<0.5,alpha=.4)



<IPython.core.display.Javascript object>

<matplotlib.collections.PolyCollection at 0x2c9b6616070>