In [1]:
import os
import pandas as pd

# Set the directory containing the CSV files
directory = '.'

# Loop through the files in the directory and print the filename  if it ends with .csv
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        print(f'Found CSV: {filename}')

Found CSV: Male2_FastRefeed_Fos_output.csv
Found CSV: FemaleRR_Fed_Fos_output.csv
Found CSV: Female1_Restraint_Fos_output.csv
Found CSV: MaleL_FastRefeed_Fos_output.csv
Found CSV: FemaleB_Restraint_Fos_output.csv
Found CSV: MaleR_Fed_Fos_output.csv


In [2]:
filenames = [f for f in os.listdir('.') if f.endswith('.csv')]

# Loop through each file and perform checks
for filename in filenames:
    # Read in the CSV file
    df = pd.read_csv(filename)
    
    # Check number of rows and columns
    if df.shape != (1678, 8):
        print(f"Error: {filename} has {df.shape[0]} rows and {df.shape[1]} columns.")
    
    # Check for missing values
    if df.isnull().values.any():
        print(f"Error: {filename} contains missing values.")
        
    # Check data types
    if df.dtypes.any() != float:
        print(f"Error: {filename} contains non-numeric data.")
    
    # Look for anomalies or outliers in the data
    # This would depend on the specifics of your data and the scientific question you are trying to answer.

Error: Male2_FastRefeed_Fos_output.csv contains non-numeric data.
Error: FemaleRR_Fed_Fos_output.csv contains non-numeric data.
Error: Female1_Restraint_Fos_output.csv contains non-numeric data.
Error: MaleL_FastRefeed_Fos_output.csv contains non-numeric data.
Error: FemaleB_Restraint_Fos_output.csv contains non-numeric data.
Error: MaleR_Fed_Fos_output.csv contains non-numeric data.


In [3]:
# Create empty lists to store the dataframes
fed_dfs = []
fast_refeed_dfs = []
restraint_dfs = []

# Loop through each file in the current directory
for file in os.listdir():
    # Check if the file is a CSV and contains "Fos_output"
    if file.endswith(".csv") and "Fos_output" in file:
        # Read the CSV into a dataframe
        df = pd.read_csv(file)
        # Determine which group the file belongs to based on the file name
        if "Fed" in file:
            fed_dfs.append(df)
        elif "FastRefeed" in file:
            fast_refeed_dfs.append(df)
        elif "Restraint" in file:
            restraint_dfs.append(df)

# Combine the dataframes for each group into a single dataframe
fed_df = pd.concat(fed_dfs)
fast_refeed_df = pd.concat(fast_refeed_dfs)
restraint_df = pd.concat(restraint_dfs)


In [4]:
fed_df.describe()

Unnamed: 0,id,parent_structure_id,depth,count,volume (mm^3),density (cells/mm^3)
count,3356.0,3356.0,3356.0,3356.0,3356.0,3356.0
mean,5616.380215,5589.783075,6.373659,18817.92,2.611701,7601.208832
std,5015.855923,5016.992858,1.649227,143388.4,22.158519,7359.81689
min,-1.0,-2.0,-1.0,0.0,1.6e-05,0.0
25%,649.0,641.0,5.0,258.0,0.070547,2517.517202
50%,5651.5,1297.0,6.0,1233.5,0.234828,5365.064036
75%,10650.0,10642.0,8.0,4525.5,0.681484,10283.213187
max,11303.0,11301.0,9.0,3091451.0,699.556516,64000.0


In [7]:
fast_refeed_df.describe()

Unnamed: 0,id,parent_structure_id,depth,count,volume (mm^3),density (cells/mm^3)
count,3356.0,3356.0,3356.0,3356.0,3356.0,3356.0
mean,5616.380215,5589.783075,6.373659,31725.51,2.611701,13666.184148
std,5015.855923,5016.992858,1.649227,228095.6,22.158519,10782.788418
min,-1.0,-2.0,-1.0,0.0,1.6e-05,0.0
25%,649.0,641.0,5.0,566.0,0.070547,5731.217193
50%,5651.5,1297.0,6.0,2506.0,0.234828,10450.774332
75%,10650.0,10642.0,8.0,8360.5,0.681484,19343.12148
max,11303.0,11301.0,9.0,4088794.0,699.556516,84900.855242


In [8]:
restraint_df.describe()

Unnamed: 0,id,parent_structure_id,depth,count,volume (mm^3),density (cells/mm^3)
count,3356.0,3356.0,3356.0,3356.0,3356.0,3356.0
mean,5616.380215,5589.783075,6.373659,28195.99,2.611701,13372.81125
std,5015.855923,5016.992858,1.649227,204146.9,22.158519,10747.139804
min,-1.0,-2.0,-1.0,0.0,1.6e-05,0.0
25%,649.0,641.0,5.0,573.75,0.070547,5388.465109
50%,5651.5,1297.0,6.0,2522.0,0.234828,10393.987007
75%,10650.0,10642.0,8.0,7574.0,0.681484,18942.45742
max,11303.0,11301.0,9.0,3744577.0,699.556516,96431.144519


In [11]:
pd.options.display.max_rows = None
pd.options.display.float_format = '{:,.0f}'.format

restraint_df.describe()
fed_df.describe()



Unnamed: 0,id,parent_structure_id,depth,count,volume (mm^3),density (cells/mm^3)
count,3356,3356,3356,3356,3356,3356
mean,5616,5590,6,18818,3,7601
std,5016,5017,2,143388,22,7360
min,-1,-2,-1,0,0,0
25%,649,641,5,258,0,2518
50%,5652,1297,6,1234,0,5365
75%,10650,10642,8,4526,1,10283
max,11303,11301,9,3091451,700,64000


In [19]:
import pandas as pd

# Merge with the structure dataframe to add the name and acronym columns
# fed_df = pd.merge(fed_df, structure_df[['id', 'name', 'acronym']], on='id')
# fast_refeed_df = pd.merge(fastrefeed_df, structure_df[['id', 'name', 'acronym']], on='id')
# restraint_df = pd.merge(restraint_df, structure_df[['id', 'name', 'acronym']], on='id')

# Sort the dataframes by count and density values and select top 20 rows
fed_top_20 = fed_df.sort_values(by=['count', 'density (cells/mm^3)'], ascending=False).head(20)
fastrefeed_top_20 = fastrefeed_df.sort_values(by=['count', 'density (cells/mm^3)'], ascending=False).head(20)
restraint_top_20 = restraint_df.sort_values(by=['count', 'density (cells/mm^3)'], ascending=False).head(20)

# Format the dataframes to show integers with commas for thousands separators
fed_top_20 = fed_top_20.applymap(lambda x: "{:,}".format(int(x)))
fastrefeed_top_20 = fastrefeed_top_20.applymap(lambda x: "{:,}".format(int(x)))
restraint_top_20 = restraint_top_20.applymap(lambda x: "{:,}".format(int(x)))
`X`
# Display the top 20 areas for each group
print("Top 20 areas for Fed group:")
print(fed_top_20[['id', 'name', 'acronym', 'count', 'density (cells/mm^3)']])
print()

print("Top 20 areas for FastRefeed group:")
print(fastrefeed_top_20[['id', 'name', 'acronym', 'count', 'density (cells/mm^3)']])
print()

print("Top 20 areas for Restraint group:")
print(restraint_top_20[['id', 'name', 'acronym', 'count', 'density (cells/mm^3)']])


Top 20 areas for Fed group:


KeyError: "['id', 'name', 'acronym'] not in index"

In [14]:
import pandas as pd
import glob

# Get a list of all CSV files in the current directory
csv_files = glob.glob('*.csv')

# Create empty data frames for each group
fed_df = pd.DataFrame()
fastrefeed_df = pd.DataFrame()
restraint_df = pd.DataFrame()

# Iterate through the CSV files
for file in csv_files:
    # Read in the data
    df = pd.read_csv(file)
    
    # Select only the relevant columns
    df = df[['count', 'density (cells/mm^3)']]
    
    # Determine which group the data belongs to and concatenate it accordingly
    if 'Fed' in file:
        fed_df = pd.concat([fed_df, df], axis=0)
    elif 'FastRefeed' in file:
        fastrefeed_df = pd.concat([fastrefeed_df, df], axis=0)
    elif 'Restraint' in file:
        restraint_df = pd.concat([restraint_df, df], axis=0)
    else:
        print(f"Warning: {file} does not match any group.")

# Take the mean of each group
fed_mean = fed_df.mean()
fastrefeed_mean = fastrefeed_df.mean()
restraint_mean = restraint_df.mean()

# Print the means
print("Mean count and density for Fed group:\n", fed_mean)
print("Mean count and density for FastRefeed group:\n", fastrefeed_mean)
print("Mean count and density for Restraint group:\n", restraint_mean)


Mean count and density for Fed group:
 count                  18,818
density (cells/mm^3)    7,601
dtype: float64
Mean count and density for FastRefeed group:
 count                  31,726
density (cells/mm^3)   13,666
dtype: float64
Mean count and density for Restraint group:
 count                  28,196
density (cells/mm^3)   13,373
dtype: float64


In [15]:
import pandas as pd

# load all six CSV files into a list of DataFrames
dfs = []
for file in os.listdir():
    if file.endswith('.csv'):
        df = pd.read_csv(file)
        dfs.append(df)

# combine the six DataFrames into one using concat
combined_df = pd.concat(dfs)

# group the combined DataFrame by 'name' and 'acronym', and calculate the mean for 'count' and 'density'
averaged_df = combined_df.groupby(['name', 'acronym'])[['count', 'density (cells/mm^3)']].mean()


# format the DataFrame to show integers with commas for thousands separators
averaged_df = averaged_df.applymap(lambda x: "{:,}".format(int(x)))

# display the averaged DataFrame
pd.options.display.max_rows = None
print(averaged_df)


                                                                       count   
name                                               acronym                     
background                                         bkd               879,520  \
left Abducens nucleus                              VI-L                   84   
left Accessory facial motor nucleus                ACVII-L                 7   
left Accessory olfactory bulb                      AOB-L               8,108   
left Accessory olfactory bulb, glomerular layer    AOBgl-L               691   
left Accessory olfactory bulb, granular layer      AOBgr-L             5,005   
left Accessory olfactory bulb, mitral layer        AOBmi-L             2,411   
left Accessory supraoptic group                    ASO-L                  69   
left Accessory trigeminal nucleus                  Acs5-L                 14   
left Agranular insular area                        AI-L               65,910   
left Agranular insular area, dorsal part

In [37]:
import pandas as pd
import glob

# Create empty DataFrames for each group
fed_df = pd.DataFrame(columns=['count', 'density (cells/mm^3)'])
fastrefeed_df = pd.DataFrame(columns=['count', 'density (cells/mm^3)'])
restraint_df = pd.DataFrame(columns=['count', 'density (cells/mm^3)'])

# Loop through all CSV files in the current directory
for file in glob.glob("*.csv"):
    # Load the CSV into a DataFrame
    df = pd.read_csv(file)
    
    # Select only the relevant columns
    df = df[['count', 'density (cells/mm^3)']]
    
    # Determine which group the data belongs to and concatenate it accordingly
    if 'Fed' in file:
        fed_df = pd.concat([fed_df, df], axis=0)
    elif 'FastRefeed' in file:
        fastrefeed_df = pd.concat([fastrefeed_df, df], axis=0)
    elif 'Restraint' in file:
        restraint_df = pd.concat([restraint_df, df], axis=0)
    else:
        print(f"Warning: {file} does not match any group.")

# Take the mean of each group
fed_mean = fed_df.mean()
fastrefeed_mean = fastrefeed_df.mean()
restraint_mean = restraint_df.mean()

# Combine the mean values into a single DataFrame
grouped_means_df = pd.concat([fed_mean, fastrefeed_mean, restraint_mean], axis=1, keys=['Fed', 'FastRefeed', 'Restraint'])

# Format the DataFrame to show integers with commas for thousands separators
grouped_means_df = grouped_means_df.applymap(lambda x: "{:,}".format(int(x)))

# Display the DataFrame
print(grouped_means_df)


                         Fed FastRefeed Restraint
count                 18,817     31,725    28,195
density (cells/mm^3)   7,601     13,666    13,372


In [35]:
# Sort the dataframes by count and density values and select top 20 rows
fed_top_20 = fed_df.sort_values(by=['count', 'density (cells/mm^3)'], ascending=False).head(20)
fastrefeed_top_20 = fastrefeed_df.sort_values(by=['count', 'density (cells/mm^3)'], ascending=False).head(20)
restraint_top_20 = restraint_df.sort_values(by=['count', 'density (cells/mm^3)'], ascending=False).head(20)

# Format the dataframes to show integers with commas for thousands separators
fed_top_20 = fed_top_20.applymap(lambda x: "{:,}".format(int(x)))
fastrefeed_top_20 = fastrefeed_top_20.applymap(lambda x: "{:,}".format(int(x)))
restraint_top_20 = restraint_top_20.applymap(lambda x: "{:,}".format(int(x)))

# Display the top 20 areas for each group
print("Top 20 areas for Fed group:")
print(fed_top_20)
print()

print("Top 20 areas for FastRefeed group:")
print(fastrefeed_top_20)
print()

print("Top 20 areas for Restraint group:")
print(restraint_top_20)


Top 20 areas for Fed group:
         count density (cells/mm^3)
1    3,091,451               12,317
2    2,902,221               12,922
3    2,350,993               17,049
4    2,211,795               19,970
5    2,169,126               20,405
1    2,142,225                8,535
2    2,028,564                9,032
839  1,694,726                6,690
3    1,690,543               12,259
4    1,582,934               14,292
840  1,572,739                6,989
5    1,560,228               14,677
6    1,370,653               22,240
841  1,233,696                8,940
842  1,151,421               10,392
843  1,118,691               10,519
839  1,089,909                4,302
840  1,016,109                4,515
6      853,076               13,842
841    794,915                5,760

Top 20 areas for FastRefeed group:
         count density (cells/mm^3)
1    4,088,794               16,290
839  3,895,260               15,378
2    3,821,572               17,015
1    3,802,646               15,150
