In [71]:
%reset -f


#Package Imports
import pandas as pd
import pyarrow
import glob
import os
import matplotlib.pyplot as plt
import matplotlib.colors as cl
import matplotlib
import numpy as np
#import IProgress
from sklearn.linear_model import LinearRegression
#import sklearn
#from tqdm import tqdm
#from tqdm import *
from tqdm.notebook import tqdm
from pathlib import Path
import shelve
import string
import lmfit

#pyfai --> Function to use if I want to write new functions to read in the 2D images and create
# the 1D profiles


# Function Definitions

## Helper Functions

In [72]:
def slope(x1,y1,x2,y2):
    m = (y2-y1)/(x2-x1)
    return m

def intercept(x1,y1,x2,y2):
    m = slope(x1,y1,x2,y2)
    b = y2-(m*x2)
    return b

def interpol_lin(x1,y1,x2,y2,x):
    m = slope(x1,y1,x2,y2)
    y = m*(x-x1) + y1
    return y

#Returns the Index of the closest value in a column
def closest(df,colx,value,offset=0):
    #NOTE colx is to be a string identifying the 
    #dataframe columns with the respective data
    closest_index = 0
    minimum = df[colx].max() - df[colx].min()
    for index in range(len(df)):
        difference = abs(df[colx][index]-value)
        #print("Current Index: " + str(index) + ";  Current Dif: " + str(difference)+";  Minimum: " + str(minimum))
        if difference<minimum:
            closest_index = index
            minimum = difference
    #NOTE This function will choose the lower index value in the event
    #there are multiple equally close points--> Adjusted now 
    return closest_index+offset

#returns the Index immediately before the closest value in a column
#Function can be done more simply with a basic > or < check if the list is assumed to be ordered
def closest_ordered(df,colx,value,offset=0):
    #NOTE colx is to be a string identifying the 
    #dataframe columns with the respective data
    closest_index = 0
    minimum = df[colx].max() - df[colx].min()
    for index in range(len(df)):
        difference = abs(df.iloc[index][colx]-value)
        #print("Current Index: " + str(index) + ";  Current Dif: " + str(difference)+";  Minimum: " + str(minimum))
        if difference<minimum:
            closest_index = index
            minimum = difference
    #NOTE This function will choose the lower index value in the event
    #there are multiple equally close points--> Adjusted now 
    if df[colx][closest_index] > value:
        closest_index -= 1   
    return closest_index+offset

def correction_slopematch(rawx,rawy,dilation,slide=0,direction = 1):
    return (rawy*(dilation**(direction))) - slide
    #return (rawy*(dilation**(direction)))
#-1 refers to a scale DOWN
#IMPORTANT: When using with df.apply() the FIRST variable is the only one that
#can be iterated over

def correction_slopematch_log(rawx,rawy,dilation,slide=0,direction = 1):
    return (rawy**(dilation*direction))*(10**(slide*direction))
    #return (rawy**(dilation*direction))
#-1 refers to a scale DOWN
#IMPORTANT: When using with df.apply() the FIRST variable is the only one that
#can be iterated over

def correction_valuematch(rawx,rawy,slope,intercept=0,direction = 1):
    return rawy*(slope**(direction)) 
#IMPORTANT: When using with df.apply() the FIRST variable is the only one that
#can be iterated over

def fraction_symm(x,lowerbound,upperbound):
    #Weight Starts at 0 the closer it is to the lowerbound
    if lowerbound == upperbound:
        return 0.0 
    elif x < lowerbound:
        return 0.0
    elif x > upperbound:
        return 1.0
    else: 
        weight = ((x-lowerbound)/(upperbound-lowerbound))
        return weight
    
def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

#Function to add a value to a dictionary --> To be Defunct soon
#def sample_metadata(category,entry,name):
#    if not (category in fields):
#        raise Exception("The category you've chosen is not in the established set of fields")
#    name = {}
#    return new

def closest_list_value(list,value):
    arr = np.asarray(list)
    i = (np.abs(arr - value)).argmin()
    return arr[i]

def closest_list_index(list,value):
    arr = np.asarray(list)
    i = (np.abs(arr - value)).argmin()
    return i

def check_positive(df):
    df=df[df.select_dtypes(include=[np.number]).ge(0).all(1)]
    return df


## Utility Functions: Autosave; Backups etc

### Save and Autosave Functions

### add_dat_extension & process_directory: Directory extension and name modifiers
Adds dat extension to naked files without an extension

In [73]:
def add_dat_extension(filename):
    #print("Function 1 Triggered")
    name, ext = os.path.splitext(filename)
    if not ext:
        new_filename = f"{name}.dat"
        return new_filename
    return filename

def process_directory(root):
    #print("Function 2 Triggered")
    for dirpath, dirnames, filenames in os.walk(root):
        #print(dirpath)
        #print(dirnames)
        #print(filenames)
        #print("Next Sample")
        for filename in filenames:
            if filename != ".DS_Store":
                old_filepath = os.path.join(dirpath, filename)
                new_filename = add_dat_extension(filename)
                new_filepath = os.path.join(dirpath, new_filename)

                if filename != new_filename:
                    os.rename(old_filepath, new_filepath)
                    print(f"Renamed: {old_filepath} -> {new_filepath}")

## Data Ingestion: Data import, export, and Metadata 

### Import Data Files 

In [74]:
def import_data_dat(name, folder, prefix, suffix,independent="q",dependent="Intensity"):    
    #Depending on the output panel, sample .dat files may have a different prefix than the sample name
    
    if not (folder[-1] == "/"):
        raise Exception("Remember to include a trailing slash on the folder directory")
    length_suffix = len(suffix)
    length_prefix = len(prefix)
    length_name = len(name) #Not currently used value --> Meaning I can put whatever information I want here
    
    #Initial Copy of original Code pt2
    # assign path
    path, dirs, files = next(os.walk(folder))
    files.sort()
    #print("Data Import:")
    #print(files)
    #print()
    file_count = len(files)
    # create empty list
    f_dataframes = []
    #print("There are "+str(file_count)+" files in this directory")
  
    # append datasets to the list 
    for i in range(file_count):
        if files[i][(-1*length_suffix):] == suffix:
            #print(folder+files[i])
            temp_df = pd.read_csv(folder+files[i], comment='#',sep='\t',usecols = [0,1], names = [independent,dependent])
            f_dataframes.append(temp_df)
        else: 
            continue
    if len(f_dataframes) == 0:
        raise Exception("Looks like your suffix was not found in the folder; please check and try again")
    
    return f_dataframes

### Create Energies List

In [75]:
def import_energieslist(name, folder, prefix, suffix):
    #Depending on the output panel, sample .dat files may have a different prefix than the sample name
    length_suffix = len(suffix)
    length_prefix = len(prefix)
    length_name = len(name)
    
    path, dirs, files = next(os.walk(folder))
    files.sort()
    #print('Energies Import:')
    #print(files)
    #print()
    #print(os.listdir(folder))
    file_count = len(files)
    f_energies_list_str = []
    sorteddirs = sorted(os.listdir(folder))
    for i in range(file_count):
        if files[i][(-1*length_suffix):] == suffix:
            #tempname = os.listdir(folder)[i][(length_prefix):(-1*(length_suffix))]
            tempname = sorteddirs[i][(length_prefix):(-1*(length_suffix))]
            #print(tempname)
            underscorefree = ""
            index = 0
            while index < len(tempname):
                if tempname[index:(index+2)] == "_0": 
                    index += 2
                    continue
                elif tempname[index] == "_":
                    index += 1
                    continue 
                else: 
                    underscorefree = underscorefree + tempname[index]
                    index += 1
            f_energies_list_str.append(underscorefree)
        else:
            continue       
        
    if len(f_energies_list_str) == 0:
        raise Exception("Looks like your suffix was not found in the folder; please check and try again")

    f_energies_list = [float(value) for value in f_energies_list_str]
    #print(energies_list)
    #Creates Key Lists:
    #energies_list --> A list of all the energies taken for a given sample, read in from file name
    #dataframes_list --> The list of all the dataframes, each dataframe constitutes one energy. 
    return f_energies_list

### Import: all folders for a given SET of samples (Scattering & OPV)

In [76]:
def import_set(folder, pre, suf,ind="q",dep="Intensity"):
    #REQUIRES CONSISTENT PREFIX AND SUFFIX
    names = []
    names_e = []
    
    #Error Checking
    if not (folder[-1] == "/"):
        raise Exception("Remember to include a trailing slash on the folder directory")
 
    
    #Loop to check root folders for Sample dirs and for files
    #for index in range(len(files)):
    #    import_data_dat(files[index],folder+files[index],prefix,suffix,ind,dep)
    #    import_energies(files[index],folder+files[index],prefix,suffix)
    
    path1, dirs1, files1 = next(os.walk(folder))

    #cycle into the directories within folder
    for index1 in range(len(dirs1)):
        path2,dirs2,files2 = next(os.walk(folder+"/"+dirs1[index1]))
        #Check the CCD0/CCD100 folders --> Final import 
        for index2 in range(len(dirs2)):
            temp_name = dirs1[index1]+"_"+dirs2[index2]
            temp_dir = path1 + dirs1[index1]+"/" +dirs2[index2]+"/"
            temp_name_e = temp_name + "_energies"
            t_glob = globals()
            t_glob.__setitem__(temp_name,import_data_dat(temp_name,temp_dir,pre,suf,ind,dep))
            t_glob.__setitem__(temp_name_e,import_energieslist(temp_name_e,temp_dir,pre,suf))
            names.append(temp_name)
            names_e.append(temp_name_e)
    print(names)
    #print(names_e)
    return (names,names_e)

#import_data_dat(name, folder, prefix, suffix,independent="q",dependent="Intensity")
#import_energieslist(name, folder, prefix, suffix):
#re.sub(r'[^\w]', ' ', <string>)

### Export one list of dataframes and an energies list

In [77]:
##accepts data and energies as LISTS 
def export_csv(name,data,energies,path=""):
    if not len(data) == len(energies):
        raise Exception("Data and Metadata lists are not the same length")
    path="Data_Export/"+path+name+"/"
    filepath = Path(path)
    filepath.mkdir(parents=True, exist_ok=True)
    
    for index in range(len(data)):
        tempname = name + "_" + str(energies[index]) + ".csv"
        #print(path)
        #filepath = Path(path)
        #data[index].to_csv(filepath/name/str(energies[index]))
        data[index].to_csv(filepath/tempname, index = False)
    return

### Build blank sample dictionary: metadata_blanksample
Saves the name and whatever added data to a sample dictionary

In [78]:
def metadata_blanksample(name,date="",description = "",trial="",viable="",material="",concentration="", spinspeed="",solvent="",anneal_temp="",anneal_time="",additive_material="",additive_amount="",ccd0_data="",ccd0_energies="",ccd100_data="",ccd100_energies="",stitched_data="",stitched_energies=""):
        #Error type checking
        if not type(name)== "string":
                raise Exception("sample name must be a string")

    
        f_blanksample =                 \
                {"name":name,             \
                "descr":description,     \
                "date":date,              \
                "trial":trial,             \
                "viable":viable,            \
                "material":material,          \
                "concentration":concentration,      \
                "spinspeed":spinspeed,          \
                "solvent":solvent,            \
                "anneal":                       \
                        {"temperature":anneal_temp,      \
                        "time":anneal_time},              \
                "additive":                     \
                        {"material":additive_material,         \
                        "amount%":additive_amount},           \
                "ccd0":                         \
                        {"data":ccd0_data,             \
                        "energies":ccd0_energies},          \
                "ccd100":                       \
                        {"data":ccd100_data,             \
                        "energies":ccd100_energies},          \
                "stitched":                     \
                        {"data":stitched_data,             \
                        "energies":stitched_energies},          \
                
                }

        #Establish Variable name:
        t_glob = globals()
        t_glob.__setitem__(name,f_blanksample)

        return



## Data Visualization Functions

### Plot All: plot_allenergies

In [79]:
def plot_allenergies(f_name,f_data,f_energy,xscale = "log",yscale="log",distr = "index",path=""):
    name = f_name + ": All Energies"
    path = "Figures/" + path

    #Error Checking
    if not (path[-1] == "/"):
        raise Exception("Remember to include a trailing slash on the folder directory")
    filepath = Path(path)
    filepath.mkdir(parents=True, exist_ok=True)
    
    #Make First plot, standard intensity plot
    fig1 = plt.figure(figsize=(20, 10),dpi = 200)
    ax1 = fig1.add_subplot(111)
    ax1.set_ylabel('Intensity', fontsize = 20)
    ax1.set_xlabel('q ($A^{-1}$)', fontsize = 20)
    plt.xticks(fontsize= 18)
    plt.yticks(fontsize= 18)
    fig1.patch.set_facecolor('white')
    plt.title(name, fontsize = 25, pad=15)
    plt.ylim(bottom=10e-15,top=10e-7) #BANDAID SOLUTION for Value elimination

    #DEFAULT: use linear scaling --> Use Log Scaling actually
    ax1.set_xscale(xscale)
    ax1.set_yscale(yscale)
    
    #Choose color distribution scheme --> DEFAULT: Scale by Index for better contrast
    if distr== "energy":
        norm = cl.Normalize(vmin=min(f_energy), vmax=max(f_energy))
        cmap = matplotlib.cm.get_cmap('rainbow')
        colors = [cmap(norm(float(f_energy[i]))) for i in range(len(f_energy))]
    else:
        cmap = matplotlib.cm.get_cmap('rainbow')
        norm = cl.Normalize(vmin=0, vmax=len(f_energy))
        colors = [cmap(norm(i)) for i in range(len(f_energy))]

    for index in range(len(f_data)):
        ax1.plot(f_data[index]['q'], f_data[index]['Intensity'],color=colors[index], label=f_energy[index])

    leg1 = ax1.legend(ncol=4,loc='upper right')
    
    #Save the plot as png
    plt.savefig(path+f_name + "_allenergies_I-loglog.png")
    plt.close()
    
    #Make second I*q^2 plot 
    fig2 = plt.figure(figsize=(20, 10),dpi = 200)
    ax2 = fig2.add_subplot(111)
    ax2.set_ylabel('$I*q^2$', fontsize = 20)
    ax2.set_xlabel('q ($A^{-1}$)', fontsize = 20)
    ax2.set_xscale('log')

    #ax1.set_ylim([10e4, 10e9])
    plt.xticks(fontsize= 18)
    plt.yticks(fontsize= 18)
    fig2.patch.set_facecolor('white')
    plt.title(name, fontsize = 25, pad=15)

    #xscale is dependent on the python xscale API
    #DEFAULT: use "linear" for scale
    ax2.set_xscale(xscale)

    #Choose color distribution scheme --> DEFAULT: Scale by Index for better contrast
    if distr== "energy":
        norm2 = cl.Normalize(vmin=min(f_energy), vmax=max(f_energy))
        cmap2 = matplotlib.cm.get_cmap('rainbow')
        colors2 = [cmap(norm(float(f_energy[i]))) for i in range(len(f_energy))]
    else:
        cmap2 = matplotlib.cm.get_cmap('rainbow')
        norm2 = cl.Normalize(vmin=0, vmax=len(f_energy))
        colors2 = [cmap(norm(i)) for i in range(len(f_energy))]

    for index in range(len(f_data)):
        #ax1.plot(dataframes_list[index])
        ax2.plot(f_data[index]['q'], (f_data[index]['Intensity']*(f_data[index]['q']**2)),color=colors2[index], label=f_energy[index])

    leg2 = ax2.legend(ncol=4,loc='upper right')
    plt.savefig(path+f_name + "_allenergies_Iq2-loglin.png")
    plt.close()

### Plot Select Energies: plot_selectenergy

In [80]:
#Function to plot 
def plot_selectenergy(f_name,f_data,f_energy,energies,xscale = "log",yscale="log",distr = "index",path=""):
    path = "Figures/" + path

    #Error Checking
    if not (path[-1] == "/"):
        raise Exception("Remember to include a trailing slash on the folder directory")
    filepath = Path(path)
    filepath.mkdir(parents=True, exist_ok=True)
    
    #FIRST PLOT
    #Extract the indexes of the correct dataframes and add to list
    indexes = []
    for value in energies:
        #indexes.append(f_energy.index(value))
        indexes.append(closest_list_index(f_energy,value))
     
    name = f_name + ": Select Energies"

    #Create Plot Frame
    figA = plt.figure(figsize=(20, 10),dpi = 200)

    axA = figA.add_subplot(111)
    #plt.ylabel('Intensity')
    #plt.xlabel('q')
    #figA.supxlabel('q', fontsize = 20)
    #figA.supylabel('Intensity', fontsize = 20)
    axA.set_ylabel('Intensity', fontsize = 20)
    axA.set_xlabel('q ($A^{-1}$)', fontsize = 20)
    #ax2.set_ylim([10e4, 10e-25])
    plt.xticks(fontsize= 18)
    plt.yticks(fontsize= 18)
    figA.patch.set_facecolor('white')
    plt.title(name, fontsize = 25, pad=15)
    plt.ylim(bottom=10e-15,top=10e-7) #BANDAID SOLUTION for Value elimination

    #plt.ginput(2)
    
    #xscale is dependent on the python xscale API
    #DEFAULT: use "linear" for scale
    axA.set_xscale(xscale)
    axA.set_yscale(yscale)

    #Choose color distribution scheme --> DEFAULT: Scale by Index for better contrast
    if distr== "energy":
        norm = cl.Normalize(vmin=min(f_energy), vmax=max(f_energy))
        cmap = matplotlib.cm.get_cmap('rainbow')
        colors = [cmap(norm(float(f_energy[i]))) for i in range(len(f_energy))]
    else:
        cmap = matplotlib.cm.get_cmap('rainbow')
        norm = cl.Normalize(vmin=0, vmax=len(f_energy))
        colors = [cmap(norm(i)) for i in range(len(f_energy))]


    for index in indexes:
        axA.plot(f_data[index]['q'], f_data[index]['Intensity'],color=colors[index], label=f_energy[index])

    legA = axA.legend(ncol=4,loc='upper right',frameon = False, fontsize = 15)
    plt.savefig(path+f_name + "_selectenergies_I-loglog.png")
    plt.close()
    
    #NOW THE Iq^2
    #Create Plot Frame
    figB = plt.figure(figsize=(20, 10),dpi = 200)

    axB = figB.add_subplot(111)
    #plt.ylabel('Intensity')
    #plt.xlabel('q')
    #figB.supxlabel('q', fontsize = 20)
    #figB.supylabel('Intensity', fontsize = 20)
    axB.set_ylabel('$I*q^2$', fontsize = 20)
    axB.set_xlabel('q ($A^{-1}$)', fontsize = 20)
    axB.set_yscale('log')
    #ax2.set_ylim([10e4, 10e9])
    plt.xticks(fontsize= 18)
    plt.yticks(fontsize= 18)
    figB.patch.set_facecolor('white')
    plt.title(name, fontsize = 25, pad=15)
    #plt.ginput(2)
    
    
    #xscale is dependent on the python xscale API
    #DEFAULT: use "linear" for scale
    axB.set_xscale(xscale)

    #Choose color distribution scheme --> DEFAULT: Scale by Index for better contrast
    if distr== "energy":
        norm2 = cl.Normalize(vmin=min(f_energy), vmax=max(f_energy))
        cmap2 = matplotlib.cm.get_cmap('rainbow')
        colors2 = [cmap(norm(float(f_energy[i]))) for i in range(len(f_energy))]
    else:
        cmap2 = matplotlib.cm.get_cmap('rainbow')
        norm2 = cl.Normalize(vmin=0, vmax=len(f_energy))
        colors2 = [cmap(norm(i)) for i in range(len(f_energy))]

    for index in indexes:
        #axB.plot(dataframes_list[index])
        axB.plot(f_data[index]['q'], (f_data[index]['Intensity']*(f_data[index]['q']**2)),color=colors2[index], label=f_energy[index])

    legB = axB.legend(ncol=4,loc='upper right',frameon = False, fontsize = 15)
    plt.savefig(path+f_name + "_selectenergies_Iq2-loglin.png")
    plt.close()
    
    return


### Plot SINGLE Energy: plot_singleenergy

In [81]:
#Function to plot 
def plot_singleenergy(f_name,f_data,f_energy,energy,xscale = "log",yscale="log",distr = "index",path="",line_color='blue'):
    path = "Figures/" + path

    #Error Checking
    if not (path[-1] == "/"):
        raise Exception("Remember to include a trailing slash on the folder directory")
    filepath = Path(path)
    filepath.mkdir(parents=True, exist_ok=True)
    
    #FIRST PLOT
    #Extract the indexes of the correct dataframes and add to list
    index = closest_list_index(f_energy,energy)
    energy = f_energy[index]
     
    name = f_name + ": " + str(energy)+" eV"

    #Create Plot Frame
    figA = plt.figure(figsize=(20, 10),dpi = 200)

    axA = figA.add_subplot(111)
    #plt.ylabel('Intensity')
    #plt.xlabel('q')
    #figA.supxlabel('q', fontsize = 20)
    #figA.supylabel('Intensity', fontsize = 20)
    axA.set_ylabel('Intensity', fontsize = 20)
    axA.set_xlabel('q ($A^{-1}$)', fontsize = 20)
    #ax2.set_ylim([10e4, 10e9])
    plt.xticks(fontsize= 18)
    plt.yticks(fontsize= 18)
    figA.patch.set_facecolor('white')
    plt.title(name, fontsize = 25, pad=15)
    plt.ylim(bottom=10e-15,top=10e-7) #BANDAID SOLUTION for Value elimination
    #plt.ginput(2)
    
    #xscale is dependent on the python xscale API
    #DEFAULT: use "linear" for scale
    axA.set_xscale(xscale)
    axA.set_yscale(yscale)

    #Detached plot command from multiloop function
    axA.plot(f_data[index]['q'], f_data[index]['Intensity'], label=f_energy[index],color = line_color)

    #legA = axA.legend(ncol=4,loc='upper right',frameon = False, fontsize = 15)
    plt.savefig(path+f_name + "_singleenergy-"+str(energy)+"_I-loglog.png")
    plt.close()
    
    
    #NOW THE Iq^2
    #Create Plot Frame
    figB = plt.figure(figsize=(20, 10),dpi = 200)

    axB = figB.add_subplot(111)
    #plt.ylabel('Intensity')
    #plt.xlabel('q')
    #figB.supxlabel('q', fontsize = 20)
    #figB.supylabel('Intensity', fontsize = 20)
    axB.set_ylabel('$I*q^2$', fontsize = 20)
    axB.set_xlabel('q ($A^{-1}$)', fontsize = 20)
    axB.set_yscale('log')
    #ax2.set_ylim([10e4, 10e9])
    plt.xticks(fontsize= 18)
    plt.yticks(fontsize= 18)
    figB.patch.set_facecolor('white')
    plt.title(name, fontsize = 25, pad=15)
    #plt.ginput(2)
    
    
    #xscale is dependent on the python xscale API
    #DEFAULT: use "linear" for scale
    axB.set_xscale(xscale)

    axB.plot(f_data[index]['q'], (f_data[index]['Intensity']*(f_data[index]['q']**2)),color=line_color, label=f_energy[index])

    legB = axB.legend(ncol=4,loc='upper right',frameon = False, fontsize = 15)
    plt.savefig(path+f_name + "_singleenergy-"+str(energy)+"_Iq2-loglog.png")
    plt.close()
    
    return

## Processing & Analysis Functions: Manipulate Data in Some Fashion

### Stitch WAXS-SAXS: analysis_stitch

In [82]:
# Function for Stitching of CCD0 and CCD100 --> Intake; CCD0 data frame, CCD100 data frame, Full vs Windowed, 
# Upper Bound and Lower Bound
# SMOOTHING accounts for the number of additional points added to the linear fit at the stich point
# WINDOW considers how much of the data to use in the weighted averaging
# SMOOTHING - The number of points included in the linear regression
# REGIME -  How the Data is to be plotted: linlin and loglog
# METHOD - Method of data matching: slope and value (Slope does slope matching and then a baseline correction)
        # Value matching multiplies the whole ccd100 dataset to match at the stitch point


def analysis_stitch(axisX,axisY,ccd100,ccd0,q=None,smoothing=25,window=1,regime="loglog",method="slope",debug = 0,checkpoint=0,progress=0):
    #THIS PROGRAM WAS WRITTEN BACKWARDS:
    #Originally i thought that CCD0 corresponded to lower q value, that is the opposite case
    #Bandaid Solution: Swap ccd0 and ccd100 in the ingestion phase of the program (first line) --> IMPLEMENTED
    
    #Spot to manually activate debugging mode
    #debug = 1 #Debugging Mode is on
    #checkpoint = 1
    #progress=1
    
    #make necessary copies of the dataframes so I dont end up modifying any
    ccd0working = ccd0.copy()
    ccd100working = ccd100.copy()

    #Technique 3: Replace negatives tiny value using AxisY input
    ccd0working.loc[ccd0working[axisY]<0,axisY]=1e-200
    ccd100working.loc[ccd100working[axisY]<0,axisY]=1e-200

    
    if (debug == 1):
        #DEBUG CODE: DELETE LATER
        ax1 = ccd100.plot(x =axisX, y=axisY, kind = 'line')
        ax1.set_yscale('log')
        ax1.set_xscale('log')
        ccd0.plot(x =axisX, y=axisY, kind = 'line',ax=ax1)
    
    #Instead of trimming the data sets, identify the nearest indices in both dataframes
    #sAssumes increase in the q of the X Axis
    index_max = ccd0working[axisX].idxmax() #I think this value is trivial...
    q_max = ccd0working[axisX].max()
    index_min = ccd100working[axisX].idxmin() #I think this value is trivial...
    q_min = ccd100working[axisX].min()
    
    if ((ccd0[axisX].max() > ccd100[axisX].max())or(ccd0[axisX].min() > ccd100[axisX].min())):
        raise Exception("You might have swapped ccd0 and ccd100 or one set is contained inside the other one")
        
    if ((window > 1) or (window < 0)):
        raise Exception("Window must be a value from 0 to 1 inclusive")
    
    #Calculate q if None is provided
    if ((q is None) and (regime == "loglog")):
        q = (q_max-q_min)/(np.log(q_max)-np.log(q_min))
        if (progress == 1):
            print("COMPLETED: Automatic stitchpoint is at q = " + str(q))
    elif ((q is None) and (regime == "linear")):
        q = 0.5*(q_max+q_min) #Average based on data points
        if (progress == 1):
            print("COMPLETED: Automatic stitchpoint is at q = " + str(q))
    else: 
        if (progress == 1):
            print("COMPLETED: Manual stitchpoint is at q = " + str(q))
        
    #Error Checking for q value:
    if ((q > q_max) or (q < q_min)):
        raise Exception("Chosen stitchpoint is out of bounds")
        
    #Same parameters but of the stitch point
    index_stitch_ccd0 = closest(ccd0working,axisX,q)
    index_stitch_ccd100 = closest(ccd100working,axisX,q)
    
    #Identify the Edged for the averaging based on the stitch point 
    
    if (debug == 1):
        #Debug Printing Statements
        print("index_max is " + str(index_max))
        print("q_max is " + str(q_max))
        print("index_min is " + str(index_min))
        print("q_min is " + str(q_min))
        print("Stitchpoint index in ccd0 is " + str(index_stitch_ccd0))
        print("Stitchpoint index in ccd100 is " + str(index_stitch_ccd100))
    
    #Error Check: 
    if (index_stitch_ccd0 - smoothing < 0):
        raise Exception("Smoothing window is outside data range")
    if (index_stitch_ccd100 - smoothing > len(ccd100)):
        raise Exception("Smoothing window is outside data range")
    
    if regime == "linlin":                                                                                                                                    #Arbitrary +1's are to account for matrix reading skipping the value at the last index
        #Identify Slope/intercept of the SAXS data at the stitch point
        x0 = np.array(ccd0working.iloc[(index_stitch_ccd0-(smoothing)):(index_stitch_ccd0+1)][axisX]).reshape(-1, 1)
        y0 = np.array(ccd0working.iloc[(index_stitch_ccd0-(smoothing)):(index_stitch_ccd0+1)][axisY]).reshape(-1, 1)
        lm0 = LinearRegression()
        lm0.fit(x0, y0)
        intercept_ccd0 = lm0.intercept_[0]
        coef_ccd0 = float(lm0.coef_[0])
        
        if (debug == 1): #Debugging Mode Code
            print("x0 is " + str(x0))
            print("y0 is " + str(y0))
            print("Intercept of CCD0 is "+str(intercept_ccd0))
            print("Slope of CCD0 is "+str(coef_ccd0))

        if (progress == 1):    
            print("COMPLETED: CCD0 Linear Regression")

        #Do the Same for the CCD100 data
        x100 = np.array(ccd100working.iloc[index_stitch_ccd100:(index_stitch_ccd100+(smoothing+1))][axisX]).reshape(-1, 1)
        y100 = np.array(ccd100working.iloc[index_stitch_ccd100:(index_stitch_ccd100+(smoothing+1))][axisY]).reshape(-1, 1)
        lm100 = LinearRegression()
        lm100.fit(x100, y100)
        intercept_ccd100 = lm100.intercept_[0]
        coef_ccd100 = float(lm100.coef_[0])
        if (debug == 1): #Debugging Mode Code
            print("x100 is " + str(x100))
            print("y100 is " + str(y100))
            print("Intercept of CCD100 is "+str(intercept_ccd100))
            print("Slope of CCD100 is "+str(coef_ccd100))
            print()
        if (progress == 1):
            print("COMPLETED: CCD100 Linear Regression")

        #Calculate the Slope and Intercept then Scale the WAXS Data (Linear Realm)
        if method == "value":
            alpha = ccd0working.iloc[index_stitch_ccd0][axisY]/ccd100working.iloc[index_stitch_ccd100][axisY]
            beta = "NA"
            ccd100working[axisY] = ccd100working.apply(lambda x: correction_valuematch(x[axisX],x[axisY],alpha,direction=1),'columns')
        elif method == "slope":
            alpha = coef_ccd0/coef_ccd100
            beta = correction_slopematch(q,ccd100working.iloc[index_stitch_ccd100][axisY],alpha,0) - ccd0working.iloc[index_stitch_ccd0][axisY]
            ccd100working[axisY] = ccd100working.apply(lambda x: correction_slopematch(x[axisX],x[axisY],alpha,beta,direction=1),'columns')
        else:
            raise Exception("Provided Method may be in error or is not found in this function")
    
    #Alternate version where it does the fitting as designed for a log log plot
    elif regime == "loglog": #NOTE: Done using natural logs instead of base10 logs
         #Identify Slope/intercept of the SAXS data at the stitch point
        x0 = np.array(ccd0working.iloc[(index_stitch_ccd0-(smoothing)):(index_stitch_ccd0+1)][axisX]).reshape(-1, 1)
        y0 = np.array(ccd0working.iloc[(index_stitch_ccd0-(smoothing)):(index_stitch_ccd0+1)][axisY]).reshape(-1, 1)
        lm0 = LinearRegression()
        lm0.fit(np.log10(x0), np.log10(y0))
        intercept_ccd0 = lm0.intercept_[0]
        coef_ccd0 = float(lm0.coef_[0])
        if (debug == 1): #Debugging Mode Code
            print("x0 is " + str(x0))
            print("y0 is " + str(y0))
            print("Intercept of CCD0 is "+str(intercept_ccd0))
            print("Slope of CCD0 is "+str(coef_ccd0))
        if (progress == 1):
            print("COMPLETED: CCD0 Linear Regression")

        #Do the Same for the CCD100 data
        x100 = np.array(ccd100working.iloc[index_stitch_ccd100:(index_stitch_ccd100+(smoothing+1))][axisX]).reshape(-1, 1)
        y100 = np.array(ccd100working.iloc[index_stitch_ccd100:(index_stitch_ccd100+(smoothing+1))][axisY]).reshape(-1, 1)
        lm100 = LinearRegression()
        lm100.fit(np.log10(x100), np.log10(y100))
        intercept_ccd100 = lm100.intercept_[0]
        coef_ccd100 = float(lm100.coef_[0])
        if (debug == 1): #Debugging Mode Code
            print("x100 is " + str(x100))
            print("y100 is " + str(y100))
            print("Intercept of CCD100 is "+str(intercept_ccd100))
            print("Slope of CCD100 is "+str(coef_ccd100))
            print()
        if (progress == 1):
            print("COMPLETED: CCD100 Linear Regression")

        #Calculate the Slope and Intercept then Scale the WAXS Data (Linear Realm)
        if method == "value":
            alpha = ccd0working.iloc[index_stitch_ccd0][axisY]/ccd100working.iloc[index_stitch_ccd100][axisY]
            beta = "NA"
            ccd100working[axisY] = ccd100working.apply(lambda x: correction_valuematch(x[axisX],x[axisY],alpha,direction=1),'columns')
        elif method == "slope":
            alpha = coef_ccd0/coef_ccd100
            beta =  np.log10(ccd0working.iloc[index_stitch_ccd0][axisY]) - np.log10(correction_slopematch_log(q,ccd100working.iloc[index_stitch_ccd100][axisY],alpha))
            ccd100working[axisY] = ccd100working.apply(lambda x: correction_slopematch_log(x[axisX],x[axisY],alpha,beta,direction=1),'columns')                                        
        else:
            raise Exception("Provided Method may be in error or is not found in this function")
    else:
        raise Exception("Provided Regime may be in error or is not found in this function")
            
    if (progress == 1):
        print("COMPLETED: WAXS Data Scaling")
    
    if (debug == 1): #Debugging Mode Code
        print("Alpha is "+str(alpha)+"; Beta is " + str(beta))
        if (checkpoint == 1): #Debugging Mode Code
            print("DATAFRAME CHECKPOINT: 1")
            print(ccd0working)
            print(ccd100working)
        print()
    
    if (debug == 1):#Debugging Mode Code
        #DEBUG CODE: DELETE LATER - Plots original ccd100 and the new scaled ccd100
        ax2 = ccd100.plot(x =axisX, y=axisY, kind = 'line')
        ax2.set_yscale('log')
        ax2.set_xscale('log')
        ccd100working.plot(x =axisX, y=axisY, kind = 'line',ax=ax2)

        #DEBUG CODE: DELETE LATER - Plots the new ccd100 and the new ccd0 data before stitching
        ax3 = ccd0working.plot(x =axisX, y=axisY, kind = 'line')
        ax3.set_yscale('log')
        ax3.set_xscale('log')
        ccd100working.plot(x =axisX, y=axisY, kind = 'line',ax=ax3)
    
    #Insert new data points to CCD0 in overlap region via linear interpolation based on the overlapping q from WAXS
    #All work in this section done on the working CCD0 dataframe
    overlap_max = closest_ordered(ccd100working,axisX,q_max) #Establish the index in CCD100 that corresponds to the
    #print("overlap_max is " + str(overlap_max))
    for index100 in range(0,overlap_max+1):
        temp_q = ccd100working.iloc[index100][axisX]
        index0 = closest_ordered(ccd0working,axisX,temp_q)
        if (temp_q == ccd0working.iloc[index0][axisX]):
            continue
        newvalue = interpol_lin(ccd0working.iloc[index0][axisX],ccd0working.iloc[index0][axisY],ccd0working.iloc[index0+1][axisX],ccd0working.iloc[index0+1][axisY],temp_q)
        ccd0working = pd.DataFrame(np.insert(ccd0working.values, (index0+1), [temp_q,newvalue], axis=0),columns = ccd0working.columns)
        if (checkpoint == 1): #Debugging Mode Code
            print(str(temp_q)+ " was most closely found at index "+ str(index0))
    if (progress == 1):
        print("COMPLETED: CCD0 Linear Interpolation")
    
    if (checkpoint == 1): #Debugging Mode Code
        print("DATAFRAME CHECKPOINT: 2")
        print(ccd0working)
        print(ccd100working)
        print()
    
    #Update Important INDEXES now that the blank values have been added
    index_max = ccd0working[axisX].idxmax() #I think this value is trivial...
    index_min = ccd100working[axisX].idxmin() #I think this value is trivial...
    index_stitch_ccd0 = closest_ordered(ccd0,axisX,q)
    index_stitch_ccd100 = closest_ordered(ccd100,axisX,q)
    
    if (debug == 1): #Debugging Mode Code
        print("Stitchpoint index in ccd0 is " + str(index_stitch_ccd0))
        print("Stitchpoint index in ccd100 is " + str(index_stitch_ccd100))
        
    #Add blank spots into the ccd100 data
    overlap_min = closest_ordered(ccd0,axisX,q_min) #Establish the index in CCD0 that corresponds to the minimum of the overlap
    
    if (debug == 1): #Debugging Mode Code
        print("overlap_min is " + str(overlap_min))
    for index0 in range(overlap_min+1,index_max+1):  
        temp_q = ccd0working.iloc[index0][axisX]
        index100 = closest_ordered(ccd100working,axisX,temp_q)
        if (temp_q == ccd100working.iloc[index100][axisX]):
            continue
        newvalue = interpol_lin(ccd100working.iloc[index100][axisX],ccd100working.iloc[index100][axisY],ccd100working.iloc[index100+1][axisX],ccd100working.iloc[index100+1][axisY],temp_q)
        ccd100working = pd.DataFrame(np.insert(ccd100working.values, (index100+1), [temp_q,newvalue], axis=0),columns = ccd100working.columns)
        if (checkpoint == 1): #Debugging Mode Code
            print(str(index100) + " out of length: " + str(len(ccd100working)))
            print(str(temp_q)+ " was most closely found at index "+ str(index0))
    if (progress == 1):
        print("COMPLETED: CCD100 Linear Interpolation")
    
    #Update the values again as I've added numbers again
    index_max = ccd0working[axisX].idxmax() #I think this value is trivial...
    q_max = ccd0working[axisX].max()
    index_min = ccd100working[axisX].idxmin() #I think this value is trivial...
    q_min = ccd100working[axisX].min()
    index_stitch_ccd0 = closest_ordered(ccd0working,axisX,q)
    index_stitch_ccd100 = closest_ordered(ccd100working,axisX,q)
    
    #Determine the maximal edges of the blended data range in the CCD0 dataframe
    distance = (min(q-q_min,q_max-q))*window
    window_minQ = q-distance
    window_maxQ = q+distance
    window_minI = closest_ordered(ccd0working,axisX,window_minQ)
    window_maxI = closest_ordered(ccd0working,axisX,window_maxQ)

    if (debug == 1): #Debugging Mode Code
        if (checkpoint == 1): #Debugging Mode Code
            print("DATAFRAME CHECKPOINT: 3")
            print(ccd0working)
            print(ccd100working)
        print("window_minQ is "+str(window_minQ))
        print("window_maxQ is "+str(window_maxQ))
        print()

    #Weighted averaging based on the Windowed parameter
    #NOTE This line of code only works if the previous one has successfully 
    #added all the correct q's
    for index0 in range(window_minI,(window_maxI+1)):
        index100 = closest_ordered(ccd100working,axisX,ccd0working.iloc[index0][axisX]) #Error is here I think
        weight = fraction_symm(ccd0working.iloc[index0][axisX],window_minQ,window_maxQ)
        ccd0working.iloc[index0][axisY] = ((1-weight)*(ccd0working.iloc[index0][axisY]))+((weight)*(ccd100working.iloc[index100][axisY]))
        if (checkpoint == 1): #Debugging Mode Code
            print("Current Index0: "+str(index0)+"; Current Index100: "+str(index100))
            print("ccd0 is "+str(ccd0working.iloc[index0][axisX])+"; ccd100 is "+str(ccd100working.iloc[index100][axisX])+";")
            print("Weight is " + str(weight) +"; Blended is " +str(ccd0working.iloc[index0][axisY]))
            print()

    if (progress == 1):
        print("COMPLETED: Weighted Data Blending")
    
    ccd0working = ccd0working[:window_maxI+1] #Trim off excess CCD0 data above the window
    #Will be added later in concatenation
    
    if (checkpoint == 1): #Debugging Mode Code
        print("DATAFRAME CHECKPOINT: 4")
        print(ccd0working)
        print(ccd100working)
        print()
    
    #Concatenate the remaining working ccd100 data to the now blended ccd0 data
    overlap_max_index100 = closest_ordered(ccd100working,axisX,window_maxQ)
    
    final = pd.concat([ccd0working, ccd100working[overlap_max_index100+1:]])
    final.reset_index(drop=True,inplace=True)
        
    if (checkpoint == 1): #Debugging Mode Code
        print("DATAFRAME CHECKPOINT: FINAL")
        print(final)
        print()
    
    return  final

### Analysis: sample_stitch two lists of dataframes

In [83]:
#Desc: This function ingests two datasets and their lists of energies and then combines them using the analysis_stitch 
#function. Accepts 2 lists of dataframes and their associated lists of energies
def sample_stitch(axisX,axisY,ccd0_data,ccd0_energy,ccd100_data,ccd100_energy,q=None,smoothing=1,window=1,regime="loglog",method="slope"):
    #Create copies
    #print("ccd0_data length: "+str(len(ccd0_data)))
    #print("ccd0_energies length: "+str(len(ccd0_energy)))
    #print("ccd100_data length: "+str(len(ccd100_data)))
    #print("ccd100_energies length: "+str(len(ccd100_energy)))

    
    #Size Checks for CCD0 and CCD100
    if (len(ccd0_data) != len(ccd0_energy)):
        raise Exception("Mismatching energies and data for ccd0")
    if (len(ccd100_data) != len(ccd100_energy)):
        raise Exception("Mismatching energies and data for ccd100")
    
    #Create common energies list and trim off the extra energies from the imported datasets
    energies = intersection(ccd0_energy,ccd100_energy)
    
    #Trim the ccd0 dataset
    for index in range(len(ccd0_energy)):
        if not (ccd0_energy[index] in energies):
            del ccd0_data[index]
    
    #Trim out ccd100 dataframes
    for index in range(len(ccd100_energy)):
        if not (ccd100_energy[index] in energies):
            del ccd100_data[index]
    
    #Create blank list for stitching dataframes INTO
    stitched = []

    #Progress Bar:
    #bar1 = tqdm(total=len(energies), position=0, dynamic_ncols=True, leave=True, unit='dataframe', desc="Stitching In Progress", bar_format='{l_bar}{bar:50}| {n_fmt}/{total_fmt} [{elapsed}]')
    #Loop to Access Each Dataframe in the list
    for index in tqdm(range(len(energies)),desc="Sample In Progress",colour='blue'):
    #for index in tqdm(range(len(energies)), position=0, leave=True, unit='dataframe', desc="Stitching In Progress", bar_format='{l_bar}{bar:50}| {n_fmt}/{total_fmt} [{elapsed}]'):
        temp_df = analysis_stitch(axisX,axisY,ccd0_data[index],ccd100_data[index],q,smoothing,window,regime)
        stitched.append(temp_df)
        #bar1.update(int(1))
        #print(str(index+1)+"/"+str(len(energies))+" dataframes complete")
        #print()
        #Stich the dataframes into the new list
        
    #Return the new stuff as tuples
    return (energies, stitched)

### analysis_insertpoints: alter original dataframes to have the same x points

In [84]:
# Function accepts 2 dataframes and ALTERS THEIR ORIGINALS to have the same x points using linear interpolation
# Function has a future option function to add the lagging ends on either side 


def analysis_insertpoints(df1,df2,axisX,axisY,edges=False):

    #dataframe parameters:
    max_1 = df1[axisX].max()
    max_2 = df2[axisX].max()
    min_1 = df1[axisX].min()
    min_2 = df2[axisX].min()
    overlap_max = min(max_1,max_2)
    overlap_min = max(min_1,min_2)
    overall_max = max(max_1,max_2)
    overall_min = min(min_1,min_2)
    df1_idxmin = closest_ordered(df1,axisX,overlap_min)
    df1_idxmax = closest_ordered(df1,axisX,overlap_max)
    df2_idxmin = closest_ordered(df2,axisX,overlap_min)
    df2_idxmax = closest_ordered(df2,axisX,overlap_max)

    #overlap fraction check
    overlapfrac = ((overlap_max-overlap_min)/(overall_max-overall_min))

    #print(overlapfrac)

    if (overlapfrac<0):
        return Exception("There is no overlap between these dataframes, check again")
    if (overlapfrac<0.5):
        print("Your dataframes only overlap " + str(overlapfrac*100)+"%, are you sure?")
    #if ((overlapfrac)and(overlapfrac >=0)):
    #    return Exception("There is no overlap between these dataframes, check again")


    #Ideally Want to run the code as plastic as possible so the df1 and df2 don't have to be in order

    for index1 in range(df1_idxmin+1,df1_idxmax+1):
        temp_X = df1.iloc[index1][axisX]
        index2 = closest_ordered(df2,axisX,temp_X)
        if (temp_X == df2.iloc[index2][axisX]):
            continue
        newvalue = interpol_lin(df2.iloc[index2][axisX],df2.iloc[index2][axisY],df2.iloc[index2+1][axisX],df2.iloc[index2+1][axisY],temp_X)
        df2 = pd.DataFrame(np.insert(df2.values, (index2+1), [temp_X,newvalue], axis=0),columns = df2.columns)
    
    for index2 in range(df2_idxmin+1,df2_idxmax+1):
        temp_X = df2.iloc[index2][axisX]
        index1 = closest_ordered(df1,axisX,temp_X)
        if (temp_X == df1.iloc[index1][axisX]):
            continue
        newvalue = interpol_lin(df1.iloc[index1][axisX],df1.iloc[index1][axisY],df1.iloc[index1+1][axisX],df1.iloc[index1+1][axisY],temp_X)
        df1 = pd.DataFrame(np.insert(df1.values, (index1+1), [temp_X,newvalue], axis=0),columns = df1.columns)


    return (df1,df2,overlapfrac)

### analysis_baseline: 
Flat addition or subtraction of a data set

In [85]:

#IN DEVELOPMENT
def analysis_baseline(raw,base,axisX,axisY,flat=0,dir=-1):

    f_raw,f_base,f_overlap = analysis_insertpoints(raw,base,axisX,axisY)

    base_min = base[axisX].min()
    base_max = base[axisX].max()

    #print("f_base:")
    #print(f_base)

    if (f_overlap<0.80):
        print("WARNING: Baseline only covers "+str(f_overlap*100)+"% percent of the raw data")
    if (not((dir==1)or(dir==-1))):
        raise Exception("Direction of the adjustment can only be up (1) or down (-1)")

    for index in range(len(f_raw)):
        tempX = f_raw.iloc[index][axisX]
        #print("Current Position --- "+ str(index))
        #print(f_base.iloc[index][axisX]<base_min)
        if f_raw.iloc[index][axisX] < base_min:
            #print("Statement 1")
            base_idx = 0
        elif f_raw.iloc[index][axisX] > base_max:
            #print("Statement 2")
            base_idx = len(f_base)-1
        else:
            #print("Statement 3")
            base_idx = closest_ordered(f_base,axisX,tempX)
        adjust = (flat + f_base.iloc[base_idx][axisY])*dir
        f_raw.iloc[index][axisY] = f_raw.iloc[index][axisY] + adjust

    return f_raw

### analysis_opvsingle: Jsc, Voc, FF, PCE
Intake a single JV curve and output a tuple containing 

In [86]:

#FUTURE: Consider whether to absolute value the VOC or to leave it as is. Based on PRECEDENT I should absolute value it
#ATM I have this setup to accept a name and channel if provided
def analysis_opvsingle(df,area,powerin,name="TBD",channel="Z",trim=1):
    #print(df)
    df_trim = df.iloc[1:-7].copy()
    df_trim = df_trim.reset_index(drop=True)
    #print(df_trim)

    #Modify the 'J' (current) column to be current density in mA/cm^2
    df_trim['J'] = df_trim['J'].apply(lambda x: x*1000/area) #Change units to ones we know, mA/cm^2
    #Note: test_sample is a list of dataframe, so test_sample[#] is a dataframe

    #Task Locate Voc and Jsc by nearest whole value/index
    jsc_idx = closest(df_trim,"V",0)
    #jsc_idx = closest(df,"V",0)
    jsc = df_trim["J"][jsc_idx]
    #print(jsc_idx)
    #print("Jsc = " + str(jsc))
    voc_idx = closest(df_trim,"J",0)
    #voc_idx = closest(df,"J",0)
    voc =  df_trim["V"][voc_idx]
    #print(voc_idx)
    #print("Voc = " + str(voc))
    p_theory = jsc*voc #units of mW/cm^2
    #print("Ptheory = " + str(p_theory))

    #Create a new column with power shown as a function of voltage/current
    df_trim['P'] = df_trim['V'] * df_trim['J']
    #print("")
    #print("New Dataframe")
    #print(df)
    #df1 = df.iloc[jsc_idx:voc_idx]
    #print(df1)

    #Boolean check for order of Jsc and voc idx --> Needed for zoomed check of max power point
    #Note; this function needs to be able to en masse go through data. So I will eventually need a way to avoid errors 
    if (jsc_idx <= voc_idx):
        idx_A = jsc_idx
        idx_B = voc_idx
    if (jsc_idx > voc_idx):
        idx_B = jsc_idx
        idx_A = voc_idx


    #print(df['P'].iloc[jsc_idx:voc_idx].idxmin())
    #pmax_idx = df['P'].iloc[jsc_idx:voc_idx].idxmin() #task locate the index of the max power point
    pmax_idx = df_trim['P'].iloc[idx_A:idx_B].idxmin() #task locate the index of the max power point

    p_max = df_trim['P'].iloc[pmax_idx]
    #print(p_max)

    pce = -100*(p_max/(powerin*1000)) #calculate PCE
    #print("PCE = " + str(pce)+"%")
    ff = (p_max/p_theory)
    #print("FF = " + str(ff))

    #task Export all values as a dictionary
    f_opv = {
        "name": name,
        "channel": channel,
        "pce": round(pce,3),
        "jsc": round(jsc,3),
        "voc": round(abs(voc),3),
        "ff": round(ff,3)
    }

    return f_opv
    
#Still must correct for flipped order Jsc, VOC --> Basic boolean?)

### analysis_opvset: Takes in folder structure with samples, calculates performance parameters and outputs as CSV

In [133]:
def analysis_opvset(folder, area, pin,vfirst = 1,export=1,exportpath="",plot=0,f_trim=1,max_channels=6):
    #output = pd.DataFrame()
    output = pd.DataFrame(columns=["name","channel",'pce','jsc','voc','ff'])

    #Check just in case receiver data reverses the current and voltage
    if vfirst == 1:
        col1 = "V"
        col2 = 'J'
    else:
        col1 = 'J'
        col2 = 'V'
    
    #Error Checking
    #if not (folder[-1] == "/"):
    #    raise Exception("Remember to include a trailing slash on the folder directory")
    process_directory(folder) #Check to make sure everything has .dat on it

    for dirpath, dirnames, filenames in os.walk(folder):
        for filename in filenames:
            if filename.endswith('.dat') and (not (filename.startswith('.') or filename.startswith('dark'))):
                file_path = os.path.join(dirpath, filename)
                folder_name = os.path.basename(dirpath)
                name, ext = os.path.splitext(filename)
                print(str(folder_name)+"/"+str(filename)+" --> Reading")
                df1 = pd.read_csv(file_path, comment='#',sep='\t',usecols = [0,1], names = [col1,col2],skiprows=1)
                #temp_cell = analysis_opvsingle(df1,area,pin,folder_name,channel=string.ascii_lowercase[((int(name[-1])-1)%max_channels)+1],trim=f_trim)
                #print(name.split('_')[-1])
                print(((int(name.split('_')[-1])-1) % max_channels))
                print(string.ascii_lowercase[((int(name.split('_')[-1])-1) % max_channels)])
                temp_cell = analysis_opvsingle(df1,area,pin,folder_name,channel=string.ascii_lowercase[((int(name.split('_')[-1])-1) % max_channels)],trim=f_trim)
                print(temp_cell)
                #ChatGPT convert_filename = lambda name, max_channels=6: string.ascii_uppercase[(int(name.split('_')[-1]) - 1) % max_channels] 
                #df2 = pd.DataFrame(temp_cell)
                #df2 = pd.DataFrame(analysis_opvsingle(df1,area,pin,folder_name,channel=string.ascii_uppercase[int(name[-1])-1]))
                #output = pd.concat([output, df2], ignore_index=True)
                output = pd.concat([output,pd.DataFrame([temp_cell])],ignore_index=True)
                # df = pd.concat([df, pd.DataFrame([data_dict])], ignore_index=True) #sample code
                #print(df2)
                #process_dat_file(file_path)
                #print(f"File: {filename}, Folder: {folder_name}")
                print(str(folder_name)+"/"+str(filename)+" --> Completed")

    output_sorted = output.sort_values(by=['name','channel'])
    output_sorted = output_sorted.reset_index(drop=True)
    print("**OPV ANALYSIS COMPLETE**")     


    #Error Checking
    if len(output)==0:
        raise Exception("You didn't extract any data, check to see if you have the correct folder directory")

    #print(output.head())
    #print(output.head()

    if export == 1:
        path="Data_Export/"+exportpath
        filepath = Path(path)
        filepath.mkdir(parents=True, exist_ok=True)
        output_sorted.to_csv(path/Path("opvstats.csv"), index = False)


    return output_sorted

## Combination Functions: Upper Hierarchy handling

### session_stitch: Processing of an entire set of data and energies

In [None]:
def session_stitch(list_data,list_energies,selectenergies,singleenergy,f_smooth=10,f_window=1,f_method='slope',f_regime='loglog',xaxis='q',yaxis='Intensity',f_suffix=""):
    print("WARNING: Function is meant as an OVERVIEW summary data processing technique, so most functions called within use default values. If you wish to customize fit parameters either update this function,\
    or simply copy the loop structure contained within and customize it in working code")
    print("Code has also not been optimized to account for differing sorting order when importing USE AT OWN RISK")

    #Size Checks for data and energies
    if (len(list_data) < len(list_energies)):
        raise Exception("You have more energies lists than data lists")
    if (len(list_data) > len(list_energies)):
        raise Exception("Mismatching energies and data for ccd100")
    

    length = len(list_data)
    #print(length)
    if length % 2 != 0:
        raise Exception("Uneven number of dataframes supplied, check for missing CCD0 or CCD100")

    for i in tqdm(range(int(length/2)),desc="Session Progress",colour='red'):
        t_energies,t_data = sample_stitch(xaxis,yaxis,globals()[list_data[2*i+1]],globals()[list_energies[2*i+1]],globals()[list_data[2*i]],globals()[list_energies[2*i]],\
            smoothing=f_smooth,window=f_window,method=f_method,regime=f_regime)
        t_name = list_data[2*i][:-7] +"_"+f_suffix
        t_path = t_name + "/"
        #print(t_name) #Debug Code
        export_csv(t_name,t_data,t_energies)
        plot_allenergies(t_name,t_data,t_energies,path=t_path)
        plot_selectenergy(t_name,t_data,t_energies,selectenergies,path=t_path)
        plot_singleenergy(t_name,t_data,t_energies, singleenergy,path=t_path)
        

    return 

### session_opv: Processing of an entire day of opv data from hierarchies

In [None]:
#TBD code for session importing and exporting
test = "Test"

print(7%7)

0


# Troubleshooting and Clipboard Code

## Template Analysis Code:

#### OPV Analysis Function

In [134]:
#Temporary test cell for OPV Data Analysis function
#test_sample = import_data_dat("light_1","20230901/ADoE_04","",".dat",independent="V",dependent="J")
#print("ADoE_4 (A) has " + str(len(test_sample)) + " dataframes in it")
area = 0.162 #cm^2 0.172
#p_in = 0.0984 # W/cm^2
p_in = 0.09676 # W/cm^2 0.0992
#print(analysis_opvsingle(test_sample,area,p_in,"ADoE_4 (A)"))
#test = pd.read_csv("20230901_Debug/ADoE_04/light_1.dat", comment='#',sep='\t',usecols = [0,1], names = ['V','J'],skiprows=1)
#print("OUTSIDE FUNCTION")
#print(test)
#test2 = test.iloc[1:-1].copy()
#test2 = test2.reset_index(drop=True)
#print(test2)
#print("FUNCTION TEST")
#test_res = analysis_opvsingle(test2,area,p_in,name = "Debug")
#test_res2 = analysis_opvsingle(test,area,p_in,name = "Debug")
#print(test_res)
#print(test_res2)

samples = analysis_opvset("20210722",area,p_in,vfirst=1)


3/light_2.dat --> Reading
1
b
{'name': '3', 'channel': 'b', 'pce': 10.276, 'jsc': 21.568, 'voc': 0.77, 'ff': 0.598}
3/light_2.dat --> Completed
3/light_22.dat --> Reading
3
d
{'name': '3', 'channel': 'd', 'pce': 9.863, 'jsc': 21.304, 'voc': 0.79, 'ff': 0.567}
3/light_22.dat --> Completed
3/light_23.dat --> Reading
4
e
{'name': '3', 'channel': 'e', 'pce': 10.445, 'jsc': 20.881, 'voc': 0.78, 'ff': 0.62}
3/light_23.dat --> Completed
3/light_3.dat --> Reading
2
c
{'name': '3', 'channel': 'c', 'pce': 11.025, 'jsc': 21.417, 'voc': 0.77, 'ff': 0.647}
3/light_3.dat --> Completed
3/light_1.dat --> Reading
0
a
{'name': '3', 'channel': 'a', 'pce': 8.265, 'jsc': 20.112, 'voc': 0.76, 'ff': 0.523}
3/light_1.dat --> Completed
3/light_21.dat --> Reading
2
c
{'name': '3', 'channel': 'c', 'pce': 11.44, 'jsc': 21.06, 'voc': 0.78, 'ff': 0.674}
3/light_21.dat --> Completed
3/light_20.dat --> Reading
1
b
{'name': '3', 'channel': 'b', 'pce': 11.026, 'jsc': 21.392, 'voc': 0.79, 'ff': 0.631}
3/light_20.dat -->

  output = pd.concat([output,pd.DataFrame([temp_cell])],ignore_index=True)


In [107]:
print(samples)
samples_sorted = samples.sort_values(by=['name'])
samples_sorted = samples_sorted.reset_index(drop=True)
#print(samples_sorted)

    name channel     pce     jsc   voc     ff
0      1       a  10.929  21.875  0.81  0.597
1      1       a  10.592  22.573  0.81  0.560
2      1       a  11.584  25.955  0.81  0.533
3      1       a  11.447  23.506  0.81  0.582
4      1       b  11.008  20.952  0.79  0.643
..   ...     ...     ...     ...   ...    ...
607    9       e   8.359  20.866  0.76  0.510
608    9       z   8.857  23.325  0.78  0.471
609    9       z   9.260  24.618  0.75  0.485
610    9       z   8.396  23.204  0.75  0.467
611    9       z   8.344  20.781  0.76  0.511

[612 rows x 6 columns]


In [None]:
t_folder = "./Test_RawData/"
t_prefix = "8257_"
t_suffix = "_100_180_10.dat"
x = 'q'
y = 'Intensity'

#session_data, session_energies = import_set(t_folder,t_prefix,t_suffix,x,y)
#print()
#print(type(Ace_2_A_CCD0))
#print(Ace_2_A_CCD0['q'])
#t_energies,t_stitched = sample_stitch('q','Intensity',SW_0F_A_CCD0,SW_0F_A_CCD0_energies,SW_0F_A_CCD100,SW_0F_A_CCD100_energies,smoothing=10)


#print(session_data)
#print(session_energies)
#print(pm6y6_high_2_a_CCD0)
print("BLANKBLANK")



#print(pm6y6_high_2_a_CCD100)
#t_energies,t_stitched = sample_stitch('q','Intensity',pm6y6_low_12_a_CCD0,pm6y6_low_12_a_CCD0_energies,pm6y6_low_12_a_CCD100,pm6y6_low_12_a_CCD100_energies,smoothing=10)
#plot_selectenergy('TEST',t_stitched,t_energies,[250.0,282.5,284.8,286.5,400.0])
#plot_selectenergy(PM6-Y6_HIGH_1-2_A_CCD0[0],PM6-Y6_HIGH_1-2_A_CCD0_energies[0])


['SW_0F_A_CCD100', 'SW_0F_A_CCD0']



ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

## FIGURE GENERATION

In [None]:
'''#Working Session export code)
#session_stitch(sample_data[:2],sample_energies[:2],[280.0,282.0,284.2,285.5],285.1)
#sample_stitch('q','Intensity',Ace_2_A_CCD0,Ace_2_A_CCD0_energies,Ace_2_A_CCD100,Ace_2_A_CCD100_energies)
#print(len(Ace_0p5_A_CCD0))
#print(len(Ace_0p5_A_CCD100))
#print(PM6_B_CCD100_energies)
#print(len(Ace_0p5_A_CCD100_energies))
session_stitch(session_data,session_energies,[280.0,282.5,284.8,286.5],285.1,f_smooth=25,f_suffix="180")
'''

Code has also not been optimized to account for differing sorting order when importing USE AT OWN RISK


Session Progress:   0%|          | 0/7 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/28 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/28 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/28 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/30 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/28 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/28 [00:00<?, ?it/s]

Sample In Progress:   0%|          | 0/28 [00:00<?, ?it/s]

CCD100 Check
[269.9, 280.0, 281.0, 282.5, 282.0, 283.2, 283.5, 283.0, 284.2, 284.5, 284.8, 284.0, 285.2, 285.4, 285.5, 285.8, 285.0, 286.5, 286.0, 287.5, 287.0, 288.5, 288.0, 289.5, 289.0, 290.0, 295.0, 300.0, 310.0, 320.0]
30
30
CCD0 Check
[270.0, 280.0, 281.0, 282.5, 282.0, 283.2, 283.5, 283.0, 284.2, 284.5, 284.8, 284.0, 285.2, 285.4, 285.5, 285.8, 285.0, 286.5, 286.0, 287.5, 287.0, 288.5, 288.0, 289.5, 289.0, 290.0, 295.0, 300.0, 310.0, 320.0]
30
30


Sample In Progress:   0%|          | 0/29 [00:00<?, ?it/s]

TypeError: reduction operation 'argmax' not allowed for this dtype