# This notebook follows TY_3 for each data
# Quantification of field size and #field

In [None]:
import numpy as np
import os

# Define the directory containing the .npy files
directory = r"D:\2P_DATA\Alex_data\Compiled_data"

# List all .npy files in the directory
file_list = [file for file in os.listdir(directory) if file.endswith('.npy')]

# Load all .npy files into a dictionary with filenames as keys
data_dict = {file: np.load(os.path.join(directory, file)) for file in file_list}

# Now, data_dict contains all the loaded arrays with filenames as keys
for filename, data in data_dict.items():
    print(f"Filename: {filename}")
    print(data)


In [None]:
# Define the criteria for filtering
criteria_list = [
    "grid_count_familiar",
    "grid_count_novel",
    
    "grid_width_familiar",
    "grid_width_novel",
    
]

# Create a dictionary to hold the filtered results for each criterion
filtered_data = {criteria: {filename: data for filename, data in data_dict.items() if criteria in filename} for criteria in criteria_list}

# Print the filtered data for each criterion
for criteria, filtered_dict in filtered_data.items():
    print(f"\nFiles containing '{criteria}':")
    for filename, data in filtered_dict.items():
        print(f"Filename: {filename}")
        print(data)

In [None]:
import itertools
sequence_data=[]
for name, data_array in filtered_data["grid_count_familiar"].items():
    sequence_data.append(data_array.tolist())
grid_count_familiar = list(itertools.chain.from_iterable(
    item if isinstance(item, list) else [item] for item in sequence_data
))

sequence_data=[]
for name, data_array in filtered_data["grid_count_novel"].items():
    sequence_data.append(data_array.tolist())
grid_count_novel = list(itertools.chain.from_iterable(
    item if isinstance(item, list) else [item] for item in sequence_data
))

sequence_data=[]
for name, data_array in filtered_data["grid_width_familiar"].items():
    sequence_data.append(data_array.tolist())
grid_width_familiar = list(itertools.chain.from_iterable(
    item if isinstance(item, list) else [item] for item in sequence_data
))

sequence_data=[]
for name, data_array in filtered_data["grid_width_novel"].items():
    sequence_data.append(data_array.tolist())
grid_width_novel = list(itertools.chain.from_iterable(
    item if isinstance(item, list) else [item] for item in sequence_data
))

zero_indices = [i for i, value in enumerate(grid_count_novel) if value == 0.0]
for index in reversed(zero_indices):
    del grid_count_familiar[index]
    del grid_count_novel[index]
    del grid_width_familiar[index]
    del grid_width_novel[index]

In [None]:
# Convert lists to numpy arrays
grid_count_familiar = np.array(grid_count_familiar)
grid_count_novel = np.array(grid_count_novel)
grid_width_familiar = np.array(grid_width_familiar)
grid_width_novel = np.array(grid_width_novel)

# Calculate the average for each criterion, ignoring NaN values
average_grid_count_familiar = np.mean(grid_count_familiar)
average_grid_count_novel = np.mean(grid_count_novel)
average_grid_width_familiar = np.mean(grid_width_familiar)
average_grid_width_novel = np.mean(grid_width_novel)

In [None]:
# Plot the scatter plot
plt.figure(figsize=(8, 6))

# Scatter plot for 3 months group
plt.scatter(np.random.normal(0.98, 0.005, len(grid_width_familiar)), grid_width_familiar, color='gray', label='familiar', alpha=0.5)

# Scatter plot for 8 months group
plt.scatter(np.random.normal(1.02, 0.005, len(grid_width_novel)), grid_width_novel, color='red', label='novel', alpha=0.5)

# Plot mean values
plt.scatter([0.98], [average_grid_width_familiar], color='gray', label='Mean_familiar', marker='o', edgecolors='black', s=100)
plt.scatter([1.02], [average_grid_width_novel], color='red', label='Mean_novel', marker='o', edgecolors='black', s=100)

# Add labels and title
plt.ylabel('field width', size=21)
plt.title('Spatial field_width', fontsize=21)
plt.xticks([0.98, 1.02], ['Familiar', 'Novel'], fontsize=18)
plt.yticks(fontsize=18)

# Set the y-axis limit
plt.ylim(bottom=0)  # Set the minimum y-axis value to 0

# Move the legend outside the plot and remove border
legend = plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=18, frameon=False)

# Remove top and left spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Change border color and thickness
for spine in plt.gca().spines.values():
    spine.set_color('black')  # Change border color to black
    spine.set_linewidth(2)     # Increase border thickness

# Show plot
plt.grid(False)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
from matplotlib.lines import Line2D

# Create the plot
plt.figure(figsize=(4, 4))

# Plot the KDEs
sns.histplot(grid_count_familiar, color='grey',stat='percent',discrete=True,label='familiar')
sns.histplot(grid_count_novel, color='red',stat='percent',discrete=True,label='novel')

#font for x- & y-axis
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.ylabel('#spatial filds (%)',fontsize=21)

#custom_xtick_positions = [(1.25+1)/2,(2+2.25)/2,(2.75+3)/2]
custom_xtick_positions=[1,2,3]
custom_xtick_labels = ['1','2','3']
plt.suptitle("#spatial fields",y=1.0, fontsize='xx-large')
plt.xticks(custom_xtick_positions,custom_xtick_labels)

# Remove top and left spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Change border color and thickness
for spine in plt.gca().spines.values():
    spine.set_color('black')  # Change border color to black
    spine.set_linewidth(2)     # Increase border thickness
    


# Move the legend outside the plot and remove border
plt.legend(bbox_to_anchor=(0.8, 1), loc='upper left', fontsize=18, frameon=False)

plt.show()

## Statistics

In [None]:
from scipy.stats import wilcoxon

# Compare familiar vs novel
stat, p = wilcoxon(grid_count_familiar, grid_count_novel)
   
print("P-values:", p)

In [None]:
from scipy.stats import wilcoxon

# Compare familiar vs novel
stat, p = wilcoxon(grid_width_familiar, grid_width_novel)
   
print("P-values:", p)

### Density plots

In [None]:
# Create the plot
plt.figure(figsize=(10, 6))

# Plot the KDEs
sns.kdeplot(grid_count_familiar, color='grey', linewidth=2,linestyle='-', label='Familiar')
sns.kdeplot(grid_count_novel, color='red', linewidth=2,linestyle='-', label='Novel')

#font for x- & y-axis
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.ylabel('Density',fontsize=21)
plt.title('Counting spatial fields', fontsize=21)

# Remove top and left spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Change border color and thickness
for spine in plt.gca().spines.values():
    spine.set_color('black')  # Change border color to black
    spine.set_linewidth(2)     # Increase border thickness

# Move the legend outside the plot and remove border
plt.legend(bbox_to_anchor=(0.8, 1), loc='upper left', fontsize=18, frameon=False)

plt.show()

In [None]:
# Create the plot
plt.figure(figsize=(10, 6))

# Plot the KDEs
sns.kdeplot(grid_width_familiar, color='gray', linewidth=2,linestyle='-', label='Familiar')
sns.kdeplot(grid_width_novel, color='red', linewidth=2,linestyle='-', label='Novel')

#font for x- & y-axis
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.ylabel('Density',fontsize=21)

# Remove top and left spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Change border color and thickness
for spine in plt.gca().spines.values():
    spine.set_color('black')  # Change border color to black
    spine.set_linewidth(2)     # Increase border thickness

# Move the legend outside the plot and remove border
plt.legend(bbox_to_anchor=(0.8, 1), loc='upper left', fontsize=18, frameon=False)

plt.show()

## field ratio N/F vs 1st time licking

In [None]:
zero_indices = [i for i, value in enumerate(grid_count_novel) if value == 0.0]
for index in reversed(zero_indices):
    del grid_count_familiar[index]
    del grid_count_novel[index]
    del grid_width_familiar[index]
    del grid_width_novel[index]

In [None]:
filtered_data["grid_count_novel"]

In [None]:
grid_count_novel=[]
for key in filtered_data["grid_count_novel"]:
    grid_count_novel.append(np.array(filtered_data["grid_count_novel"][key]))
grid_count_novel

In [None]:
grid_count_familiar=[]
for key in filtered_data["grid_count_familiar"]:
    grid_count_familiar.append(np.array(filtered_data["grid_count_familiar"][key]))
grid_count_familiar

In [None]:
grid_width_familiar=[]
for key in filtered_data["grid_width_familiar"]:
    grid_width_familiar.append(np.array(filtered_data["grid_width_familiar"][key]))
grid_width_familiar

In [None]:
grid_width_novel=[]
for key in filtered_data["grid_width_novel"]:
    grid_width_novel.append(np.array(filtered_data["grid_width_novel"][key]))
grid_width_novel

In [None]:
# Remove 0 values from grid_count_novel and filter other datasets
filtered_grid_count_novel = []
filtered_grid_count_familiar = []
filtered_grid_width_novel = []
filtered_grid_width_familiar = []

for novel_arr, familiar_arr, width_novel_arr, width_familiar_arr in zip(grid_count_novel, grid_count_familiar, grid_width_novel, grid_width_familiar):
    # Create a mask for non-zero elements in grid_count_novel
    non_zero_mask = novel_arr != 0
    # Apply the mask to all arrays
    filtered_novel = novel_arr[non_zero_mask]
    filtered_familiar = familiar_arr[non_zero_mask]
    filtered_width_novel = width_novel_arr[non_zero_mask]
    filtered_width_familiar = width_familiar_arr[non_zero_mask]
    
    # Append the filtered arrays to the result lists
    filtered_grid_count_novel.append(filtered_novel)
    filtered_grid_count_familiar.append(filtered_familiar)
    filtered_grid_width_novel.append(filtered_width_novel)
    filtered_grid_width_familiar.append(filtered_width_familiar)

print("Filtered grid_count_novel:")
for arr in filtered_grid_count_novel:
    print(arr)

print("\nFiltered grid_count_familiar:")
for arr in filtered_grid_count_familiar:
    print(arr)

print("\nFiltered grid_width_novel:")
for arr in filtered_grid_width_novel:
    print(arr)

print("\nFiltered grid_width_familiar:")
for arr in filtered_grid_width_familiar:
    print(arr)

In [None]:
average_count_each_data_novel=[]
for data_array in filtered_grid_count_novel:
    mean=np.mean(np.array(data_array))
    average_count_each_data_novel.append(mean)
average_count_each_data_novel

In [None]:
average_count_each_data_familiar=[]
for data_array in filtered_grid_count_familiar:
    mean=np.mean(np.array(data_array))
    average_count_each_data_familiar.append(mean)
average_count_each_data_familiar

In [None]:
average_width_each_data_novel=[]
for data_array in filtered_grid_width_novel:
    mean=np.mean(np.array(data_array))
    average_width_each_data_novel.append(mean)
average_width_each_data_novel

In [None]:
average_width_each_data_familiar=[]
for data_array in filtered_grid_width_familiar:
    mean=np.mean(np.array(data_array))
    average_width_each_data_familiar.append(mean)
average_width_each_data_familiar

In [None]:
average_width_each_data_nf_ratio=np.array(average_width_each_data_novel)/np.array(average_width_each_data_familiar)
average_width_each_data_nf_ratio

In [None]:
average_count_each_data_nf_ratio=np.array(average_count_each_data_novel)/np.array(average_count_each_data_familiar)
average_count_each_data_nf_ratio

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress, spearmanr

y_values = np.array([18, 2, 1, 2, 2, 13, 2, 6, 4])
x_values = average_width_each_data_nf_ratio

#remove zero value data
y_values=np.delete(y_values,7)
y_values=np.delete(y_values,2)
y_values=np.delete(y_values,0)
x_values=np.delete(x_values,7)
x_values=np.delete(x_values,2)
x_values=np.delete(x_values,0)

# Perform linear regression (Pearson correlation)
slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)

# Calculate Spearman correlation
spearman_corr, spearman_p_value = spearmanr(x_values, y_values)

# Check the significance of the correlations
significance_level = 0.05
pearson_significance = "significant" if p_value < significance_level else "not significant"
spearman_significance = "significant" if spearman_p_value < significance_level else "not significant"

# Define the regression line
regression_line = [slope * x + intercept for x in x_values]

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(x_values, y_values, color='blue')
plt.plot(x_values, regression_line, color='red')
plt.xlabel('Width_N/F_ratio')
plt.ylabel('First trial of predicted licking')
plt.title('Linear Regression and Correlation Analysis')
plt.legend()
plt.grid(True)

# Add correlation and significance information to the plot
plt.annotate(f'Pearson correlation: R={r_value:.2f}, p={p_value:.3f} ({pearson_significance})\n'
             f'Spearman correlation: R={spearman_corr:.2f}, p={spearman_p_value:.3f} ({spearman_significance})',
             xy=(0.5, 0.95), xycoords='axes fraction', fontsize=12,
             horizontalalignment='center', verticalalignment='top',
             bbox=dict(boxstyle='round,pad=0.5', edgecolor='black', facecolor='white'))

# Show plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress, spearmanr

y_values = np.array([18, 2, 1, 2, 2, 13, 2, 6, 4])
x_values = average_count_each_data_nf_ratio

#remove zero value data
y_values=np.delete(y_values,7)
y_values=np.delete(y_values,2)
y_values=np.delete(y_values,0)
x_values=np.delete(x_values,7)
x_values=np.delete(x_values,2)
x_values=np.delete(x_values,0)

# Perform linear regression (Pearson correlation)
slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)

# Calculate Spearman correlation
spearman_corr, spearman_p_value = spearmanr(x_values, y_values)

# Check the significance of the correlations
significance_level = 0.05
pearson_significance = "significant" if p_value < significance_level else "not significant"
spearman_significance = "significant" if spearman_p_value < significance_level else "not significant"

# Define the regression line
regression_line = [slope * x + intercept for x in x_values]

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(x_values, y_values, color='blue')
plt.plot(x_values, regression_line, color='red')
plt.xlabel('#field_N/F_ratio')
plt.ylabel('First trial of predicted licking')
plt.title('Linear Regression and Correlation Analysis')
plt.legend()
plt.grid(True)

# Add correlation and significance information to the plot
plt.annotate(f'Pearson correlation: R={r_value:.2f}, p={p_value:.3f} ({pearson_significance})\n'
             f'Spearman correlation: R={spearman_corr:.2f}, p={spearman_p_value:.3f} ({spearman_significance})',
             xy=(0.5, 0.95), xycoords='axes fraction', fontsize=12,
             horizontalalignment='center', verticalalignment='top',
             bbox=dict(boxstyle='round,pad=0.5', edgecolor='black', facecolor='white'))

# Show plot
plt.show()
