## Generic Data Plotting Program

In [1]:
# Imports
import numpy as np
import math
import scipy.optimize as opt
import scipy.stats as sta
import matplotlib.pyplot as plt
import csv

In [2]:
def convertToFloats(items, skip=[]):
    '''Converts a list of strings to a list of floats.
    Args:
        items: list of strings to be converted
        skip: list of indices to ignore, defaults to empty list
    Returns:
        converted list if all non-skipped elements can be converted to floats
        else 0
    '''
    conv = []
    for i in range(len(items)):
        if i in skip:
            conv.append(items[i])
        else:
            try:
                conv.append(float(items[i]))
            except ValueError:
                return 0
    return conv

In [6]:
def getData(filename, nCol, skip=[]):
    '''Reads a CSV file, imports data into lists, converts strings to floats.
    Args:
        filename: string representing the CSV file location
        nCol: number of columns in the data
        skip: list of column numbers that don't have numerical values
    Returns:
        list of nCol lists, each of which contains the data for that column
    '''
    data = []
    for i in range(nCol):
        data.append([])
    
    with open(filename) as csvReader:
        csvData = csv.reader(csvReader, delimiter=',')
        for row in csvData:
            conv = convertToFloats(row, skip)
            if conv == 0: # not a valid row
                continue
            for i in range(nCol):
                data[i].append(conv[i])
    return data

In [13]:
def calcAvg(values):
    '''Calculates average of a list of values.
    Args:
        values: list of floats
    Returns:
        float representing average of list of values
    '''
    sumtot = sum(values)
    avg = sumtot / len(values)
    return avg

def calcVar(values):
    '''Calculates variance of a list of values.
    Args:
        values: list of floats
    Returns:
        float representing variance of list of values
    '''
    avg = calcAvg(values)
    var = 0
    for val in values:
        var += (val - avg) ** 2
    return var / (len(values) - 1)

def calcStdev(values):
    '''Calculates standard deviation of a list of values.
    Args:
        values: list of floats
    Returns:
        float representing standard deviation of list of values
    '''
    return np.sqrt(calcVar(values))

In [None]:
def plot_distribution(data, nBins, xLabel, units):
    '''Plots a histogram of the data with a Gaussian overlaid.
    Args:
        data: the data to plot
        nBins: the number of bins for the histogram
        xLabel: label for the x axis
        units: units of data
    '''
    
    # Find plot range, bin width, etc.
    pltRange = max(data) - min(data)
    lowerEdge = min(data) - (pltRange / nBins)
    upperEdge = max(data) + (pltRange / nBins)
    binWidth = (upperEdge - lowerEdge) / nBins
    
    # Make the Gaussian
    avg = calcAvg(data)
    stdev = calcStdev(data)
    gauX = np.linspace(lowerEdge, upperEdge, 1000)
    gauY = len(data)*binWidth*sta.norm.pdf(gauX, avg, stdev)
    
    # Define labels
    # xLabel specified in args
    yLabel = "Frequency"
    gauLabel = "Gaussian($\mu={:.1f},\sigma={:.1f}$)".format(avg, stdev)
    
    # Use my custom style
    plt.style.use('reflynn.mplstyle')
    
    ##plot the histograms
    group, nbins, patches= plt.hist(lists, num_bins, color = 'b', label='Measurements', alpha=0.5, range = (lower_edge,upper_edge))

    ## plot error bar
    if errbar:  
        bin_centers = 0.5*(nbins[1:] + nbins[:-1])
        xrr = binwidth/2.0
        yrr = np.sqrt(group)
        plt.errorbar(bin_centers, group, xerr=xrr, yerr=yrr, label='Error',fmt='o')

    ## plot gaussian
    plt.plot(gau_x, gau_y, color = 'r', label=label_gau) ## plot gaussian

    ##title, label and set the x,y range as you like
    plt.ylabel(labels_y,position=(0.1,0.84))
    plt.xlabel(labels_x,position=(0.92,0.1))
    plt.xlim(lower_edge, upper_edge)
    plt.ylim(0, 1.35*group.max())  ## usually plot the upper range of y-axis a little bit larger than the max of the histograms to allow room for legends
    plt.title("Distribution of {}".format(labels[meas_index]))
    plt.grid(True, alpha=0.25) 
    plt.legend(loc='best')