In [None]:
# python packages
from sys import stdout
import pandas as pd # Dataframes and reading CSV files
import numpy as np # Numerical libraries
import matplotlib.pyplot as plt # Plotting library
from lmfit import Model # Least squares fitting library

In order to compute an average from multiple files, we can create an array with all the file names and then write a simple loop over its elements and compute the average using the NumPy function we learnt in the previous examples

In [None]:
listOfFiles = ["random1.csv",
               "random2.csv",
               "random3.csv",
               "random4.csv"]

for file in listOfFiles:
    data = pd.read_csv(file)
    average = np.mean(data["Y"])
    print("The average from file ",file," is ",average)

Alternatively, we can create an a new dataframe with all the averages, which will be useful for further processing. Let's for example imagine that each file corresponds to a dataset collected at a different temperature.

In [None]:
listOfTemperatures = [278, 300, 302, 282]

We can create a dataframe with three columns

In [None]:
results_df = pd.DataFrame(columns =["File","Temperature","Average"])
print(results_df)

and then use the _loc_ indexer to append data to the dataframe.

*len(results_df)* counts counts the number of elements that we have already added to the dataframe.

In [None]:
for file in listOfFiles:
    data = pd.read_csv(file)
    average = np.mean(data["Y"])
    
    ndata = len(results_df)
    results_df.loc[ndata] = [file,listOfTemperatures[ndata],average]
print(results_df)

We can now plot the average versus temperature

In [None]:
fig , ax = plt.subplots(figsize=(10,6))
ax.scatter(results_df["Temperature"],results_df["Average"],s=100)
ax.plot(results_df["Temperature"],results_df["Average"])
ax.set(xlabel="Temperature (K)")
ax.set(ylabel="Average")
plt.show()

To make the graph look better we can sort the dataframe based on the temperature

In [None]:
sorted_df = results_df.sort_values('Temperature')
print(sorted_df)

Then make a new graph

In [None]:
fig , ax = plt.subplots(figsize=(10,6))
ax.scatter(sorted_df["Temperature"],sorted_df["Average"],s=100)
ax.plot(sorted_df["Temperature"],sorted_df["Average"])
ax.set(xlabel="Temperature (K)")
ax.set(ylabel="Average")
plt.show()