In [1]:
import pandas as pd
import numpy as np
import csv
import re

In [2]:
test_file_plate_2_1 = "C:\\Users\\Bartek\\OneDrive\\Documents\\Programming\\Python\\Test data\\Protein titration LissamineFA_22267.csv"
test_file_plate_2_2 = "C:\\Users\\Bartek\\OneDrive\\Documents\\Programming\\Python\\Test data\\Protein titration LissamineFA_22741.csv"
test_file_list_A = "C:\\Users\\Bartek\\OneDrive\\Documents\\Programming\\Python\\Test data\\Outputfiles\\csv\\listA.csv"

In [3]:
pattern_p = 'Results for (.*) P channel'
pattern_s = 'Results for (.*) S channel'

In [4]:
def read_in(file): #(skipfooter and skiprows as arguments) # use python read csv function to find row number with the data
    
    """Reads raw data from a csv file and returns a data frame with two columns: well ID and p or s channel values.
    
    :param file: Raw data file in csv format
    :param type: str
    :return: pandas DataFrame
    """  
    reader = csv.reader(open(file, 'r'), delimiter=',')   # converts each line into a list
    
    for row in reader:
        if row != []:   # some rows are blank and so are converted to empty lists which cannot be indexed
            #if re.findall(r"Plate information", row[0]) == ["Plate information"]:
                    
            if re.search(pattern_p, row[0]) != None:   # Reg exp iterates over all lines of the csv file returning a match object, if no match is found then None is returned
                skiprows = reader.line_num   # .line_num returns the number of rows that has already been iterated over
                
            if re.search(r"Exported with EnVision Workstation", row[0]) != None:   # get total number of rwos in the file
                total_lines = reader.line_num
                skipfooter = total_lines - (skiprows + 18)   
    
    df = pd.read_csv(file, sep=',', index_col=0, engine='python', skiprows=skiprows, skipfooter=skipfooter, encoding='utf-8')
    df.drop(df.columns[-1], axis=1, inplace=True)   # delete the last column because it contains 'NaN' values
    
    # create lists with column names and index names
    col_names = list(df.columns.values)
    index_names = list(df.index.values)
    well_IDs = []
    
    # create list with well IDs based on the column and index names
    for index_name in index_names:
        for col_name in col_names:
            well_ID = index_name + col_name
            well_IDs.append(well_ID)
    
    # convert the data frame into numpy array and resahpe it to fit the 384 rows by 1 column table
    data = np.reshape(df.to_numpy(), (384, 1)) 
        
    new_df = pd.DataFrame(data=data, index=well_IDs, columns=['p'])   # create new data frame 384 by 1
    
    return new_df

In [5]:
read_in(test_file_plate_2_1)

Unnamed: 0,p
A01,26329.0
A02,31935.0
A03,26748.0
A04,26750.0
A05,27792.0
...,...
P20,
P21,
P22,
P23,


In [6]:
def g_factor(file):
    
    """Return the value of g-factor as a float
    
    :param file: raw data file in csv format
    :param type: str
    """
    reader = csv.reader(open(file, 'r'), delimiter=',')
    
    for row in reader:
        if row != [] and re.findall(r"G-factor", row[0]) == ["G-factor"] and len(row) > 1:
            g = float(row[4])
    return g

In [7]:
g_factor(test_file_plate_2_1)

1.0