In [1]:
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import statsmodels.stats.multitest as smm
from scipy import stats
import seaborn as sns

from Bio import Seq

pd.options.mode.chained_assignment = None

#makes viewing pandas tables better
pd.set_option('display.max_colwidth', 0)

Change the following parameters to your output directory (where you want output files to be written), your metafile, and TNseq output. 

If these files aren't in the same directory as this notebook, you need use the full path. For example, the full path of 'barseq_output' would be '/usr2/people/clairedubin/barseq/barseq_output'

Also change the control and experimental temperatures if needed.

In [2]:
#where the BarSeq FASTQs are
FASTQ_directory = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024'


## Barseq metafile

You'll need to create your own metafile - easiest way is edit in Excel. A sample metafile is located at: /usr2/people/clairedubin/barseq/Kluyv_BarSeq_metadata.txt

    Fastq: path to barseq FASTQ for each competition
    SampleName: unique name for each competition, MUST include ctrl_temp or exp_temp as specified above
    DualIndex: index from sequencing (in read name lines of FASTQ and the sequencing info file that Adam sends)
    
    UsePrecounted: True if you've already run the Barseq counting script on a competition, False otherwise
    
    poolfile: path to Tnseq output file, should be the same for each competition
    output_dir: path to output directory
    minRandom: number of bases between start of read and DualIndex, usually in sequence of 1,2,3,4,1,2,3,4 for each sample
    maxRandom: same as minRandom
    
    BeforeBarcode: sequence preceding all barcodes
    AfterBarcode: sequence following all barcodes
    BarcodeLengths: lengths of barcodes to search for, with expected length of barcode first

    

In [3]:
!ls {FASTQ_directory}/*fastq

/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB007_S1_L001_R1_001.fastq
/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB008_S1_L001_R1_001.fastq


In [4]:
#format: sample_name, fastq_path, dual_index, offset
#add or delete more lines as needed

sample_info = [('KFRB007_1', '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB007_S1_L001_R1_001.fastq', 'ATCACG', '1'),
               ('KFRB008_1', '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB008_S1_L001_R1_001.fastq', 'CGATGT', '2'), 
               ]

In [5]:
!head /usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB007_S1_L001_R1_001.fastq

@M07716:211:000000000-DNL8H:1:1101:15729:1332 1:N:0:ATCACG
TGCACTAGTCGACCTGCAGCGTACGAAAGGGCAGGTGAGGAAGGCAGAGACCTCGTGGACATCAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAAA
+
CCCCCFFFFFCCGGGGGGGGGGGGG2FGG2A22E2FG5223B111BEGHHHGHHGHHGGHHHHHHHHHHGGGGHHHHHHHGHHHGHHHHHHHHHHHHHHHHHGGHEGHHGGHHHHGGGHGHHHHHHHHHHHHGGGGG><-
@M07716:211:000000000-DNL8H:1:1101:15609:1335 1:N:0:ATCACG
TGCACTAGTCGACCTGCAGCGTACGACATCGGAAATTAGATGCCTAGAGACCTCGTGGACATCAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAAA
+
ABCCCFFFFFCCGGGGGGGGGGGGGGGGGHGGFFCHHGHHHHHHHHHHHHHGHHGHHGGHHHHHHHHHHGGGGHHHHHHHGHHHGHHHHHHHHHHHHHHHHHGFHFHHHGGHHHHGGGHGHHHHHHHHHHFHGGGGF><<
@M07716:211:000000000-DNL8H:1:1101:15808:1335 1:N:0:ATCACG
TGCACTAGTCGACCTGCAGCGTACGGAAGGGGTTCTATCTTTCCAAGAGACCTCGTGGACATCAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAAA


### To convert the annotated file to csv and not to be separated by tab (execute the next three lines only if necessary)

In [6]:
poolfile1 = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/Batch_2_sequencing_Apr_26_2024/TNSeq_13k_mutants_mapping_output_without_vector_sequence_jumpstart/TNSeq_13k_mutants_mapping_output_jumpstart_polymerase_Apr_30_2024_2_fastq_combined/Tnseq_13k_mutants_04_30_2024_combined_poolfile_annotated.csv'
df=pd.read_csv(poolfile1,sep='\t')

In [7]:
df.to_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/Batch_2_sequencing_Apr_26_2024/TNSeq_13k_mutants_mapping_output_without_vector_sequence_jumpstart/TNSeq_13k_mutants_mapping_output_jumpstart_polymerase_Apr_30_2024_2_fastq_combined/Tnseq_13k_mutants_04_30_2024_combined_poolfile_annotated_1.csv',index=None)

In [8]:
df=pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/Batch_2_sequencing_Apr_26_2024/TNSeq_13k_mutants_mapping_output_without_vector_sequence_jumpstart/TNSeq_13k_mutants_mapping_output_jumpstart_polymerase_Apr_30_2024_2_fastq_combined/Tnseq_13k_mutants_04_30_2024_combined_poolfile_annotated_1.csv')

In [9]:
#these variables should be the same for each replicate

metafile_name = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/BarSeq_metafile_13k_mutants_04_30_2024.txt'
logfile_name = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/BarSeq_counting_13k_mutants_04_30_2024.log'
poolfile = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/Batch_2_sequencing_Apr_26_2024/TNSeq_13k_mutants_mapping_output_without_vector_sequence_jumpstart/TNSeq_13k_mutants_mapping_output_jumpstart_polymerase_Apr_30_2024_2_fastq_combined/Tnseq_13k_mutants_04_30_2024_combined_poolfile_annotated_1.csv'
output_dir = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/TNSeq_13k_mutants_mapping_output/TNSeq_barseq_outputs_using_combined_tnseq_poolfile_04_30_2024'
before_barcode = 'GTCGACCTGCAGCGTACG' #bottom strand sequence was used
#before_barcode = 'GATGTCCACGAGGTCTCT' what was there before 
after_barcode =  'AGAGACCTCGTGGACATC' #bottom strand sequence was used
#after_barcode = 'CGTACGCTGCAGGTCGAC'  what was there before 
barcode_lengths = '20,19,18,21'

In [10]:
#if there are any samples where you already counted barcodes, add the sample names here
precounted_samples = []

In [11]:
metafile_columns = ['FileIndex','SampleName', 'Fastq', 'DualIndex', 'Poolfile', 'OutputDir', 'minRandom', 'maxRandom',
                   'BeforeBarcode', 'AfterBarcode', 'BarcodeLengths', 'UsePrecounted']

with open(metafile_name, 'w') as f:
    
    #write column names
    f.write('\t'.join(metafile_columns)+'\n')
    
    #write a line for each pool
    for sample_name, fastq, dual_index, offset in sample_info:
        
        #DUAL INDECES ARE BACKWARDS FROM WHAT THE FLOWCELL OUTPUT SAYS
        to_write = '\t'.join([sample_name, sample_name, fastq, dual_index[::-1], poolfile, output_dir, offset, offset,
                             before_barcode, after_barcode, barcode_lengths, 'False'])
        f.write(to_write+'\n')
        
    for precounted_sample_name in precounted_samples:
        
        to_write = '\t'.join([sample_name, sample_name, fastq, dual_index, poolfile, output_dir, offset, offset,
                             before_barcode, after_barcode, barcode_lengths, 'True'])




### Check metafile

In [12]:
pd.read_csv(metafile_name, sep='\t')

Unnamed: 0,FileIndex,SampleName,Fastq,DualIndex,Poolfile,OutputDir,minRandom,maxRandom,BeforeBarcode,AfterBarcode,BarcodeLengths,UsePrecounted
0,KFRB007_1,KFRB007_1,/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB007_S1_L001_R1_001.fastq,GCACTA,/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/Batch_2_sequencing_Apr_26_2024/TNSeq_13k_mutants_mapping_output_without_vector_sequence_jumpstart/TNSeq_13k_mutants_mapping_output_jumpstart_polymerase_Apr_30_2024_2_fastq_combined/Tnseq_13k_mutants_04_30_2024_combined_poolfile_annotated_1.csv,/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/TNSeq_13k_mutants_mapping_output/TNSeq_barseq_outputs_using_combined_tnseq_poolfile_04_30_2024,1,1,GTCGACCTGCAGCGTACG,AGAGACCTCGTGGACATC,20191821,False
1,KFRB008_1,KFRB008_1,/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/BarSeq_FASTQs_13k_mutants_03_27_2024/KFRB008_S1_L001_R1_001.fastq,TGTAGC,/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/Batch_2_sequencing_Apr_26_2024/TNSeq_13k_mutants_mapping_output_without_vector_sequence_jumpstart/TNSeq_13k_mutants_mapping_output_jumpstart_polymerase_Apr_30_2024_2_fastq_combined/Tnseq_13k_mutants_04_30_2024_combined_poolfile_annotated_1.csv,/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/TNSeq_13k_mutants_mapping_output/TNSeq_barseq_outputs_using_combined_tnseq_poolfile_04_30_2024,2,2,GTCGACCTGCAGCGTACG,AGAGACCTCGTGGACATC,20191821,False


## Barseq counting

Might take a few hours!

In [13]:
!python3 /usr2/people/clairedubin/barseq/latest_pipeline/RBseq_Count_BarCodes_v2.py -m {metafile_name} -l {logfile_name}

2024-04-30 12:42:12 RBseq_Count_BarCodes.py
2024-04-30 12:42:12 Version: 1.1.4
2024-04-30 12:42:12 Release Date: July 16, 2020
2024-04-30 12:42:12 Options passed:  metafile:/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/BarSeq_metafile_13k_mutants_04_30_2024.txt  logFile:/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/BarSeq_counting_13k_mutants_04_30_2024.log  minQual:10  matchBefore:6  matchAfter:6  quietMode:False 
2024-04-30 12:42:12 Logging status updates in /usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/BarSeq_counting_13k_mutants_04_30_2024.log
2024-04-30 12:42:12 Loading TnSeq library metadata from /usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/BarSeq_metafile_13k_mutants_04_30_2024.txt
2024-04-30 12:42:12 Setting output directory as: /usr2/people/shollyt22/s

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/TNSeq_13k_mutants_mapping_output/TNSeq_barseq_outputs_03_27_2024/poolCount.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'KFRB007_1'
y_column = 'KFRB008_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('KFRB007_1 Abundance', fontsize=14)
plt.ylabel('KFRB008_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances at 37 oC', fontsize=16, color='purple')
#plt.grid(True)  # Optionally, add grid lines
#plt.savefig("fitness_of_TN_abundances_01_03_2024_37oC.png", dpi=600, bbox_inches='tight')
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Sequencing_with_the_13k_mutants/for_analysis/TNSeq_13k_mutants_mapping_output/TNSeq_barseq_outputs_03_27_2024/poolCount.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'KFRB007_1'
y_column = 'KFRB008_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('KFRB007_1 Abundance', fontsize=14)
plt.ylabel('KFRB008_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances at 37 oC', fontsize=16, color='purple')

# Adjust the x and y-axis scale ranges
plt.xlim(0, 10000)  # Adjust the range for the x-axis
plt.ylim(0, 10000)  # Adjust the range for the y-axis

# Optionally, add grid lines
#plt.grid(True)

#plt.savefig("fitness_of_TN_abundances_03_01_2024_37oC_1a.png", dpi=600, bbox_inches='tight')
plt.show()


In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/poolCount.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'OORB0012_1'
y_column = 'OORB0013_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('OORB012_1 Abundance', fontsize=14)
plt.ylabel('OORB013_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances (45 oC)', fontsize=16, color='purple')
#plt.grid(True)  # Optionally, add grid lines
#plt.savefig("fitness_of_TN_abundances_03_01_2024_45oC.png", dpi=600, bbox_inches='tight')
plt.show()

In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/poolCount.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'OORB0012_1'
y_column = 'OORB0013_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('OORB012_1 Abundance', fontsize=14)
plt.ylabel('OORB013_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances_45 oC', fontsize=16, color='purple')
# Adjust the x and y-axis scale ranges
plt.xlim(0, 6000)  # Adjust the range for the x-axis
plt.ylim(0, 6000)  # Adjust the range for the y-axis

# Optionally, add grid lines
#plt.grid(True)
#plt.grid(True)  # Optionally, add grid lines
plt.savefig("fitness_of_TN_abundances_03_01_2024_45oC_1a.png", dpi=600, bbox_inches='tight')
plt.show()

## to normalize the Barseq pool count file based on the number of reads obtained

#### normalization based on individual sample

In [None]:
import pandas as pd

# Load the count file into a DataFrame
count_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/poolCount.txt'
df = pd.read_csv(count_file_path, sep='\t')

# Calculate the total counts for each replicate
df['Total_Count_OORB0010_1'] = df['OORB0010_1'].sum()
df['Total_Count_OORB0011_1'] = df['OORB0011_1'].sum()
df['Total_Count_OORB0012_1'] = df['OORB0012_1'].sum()
df['Total_Count_OORB0013_1'] = df['OORB0013_1'].sum()

# Normalize counts for each replicate by dividing by its own Total_Count
df['OORB0010_1'] = df['OORB0010_1'] / df['Total_Count_OORB0010_1']
df['OORB0011_1'] = df['OORB0011_1'] / df['Total_Count_OORB0011_1']
df['OORB0012_1'] = df['OORB0012_1'] / df['Total_Count_OORB0012_1']
df['OORB0013_1'] = df['OORB0013_1'] / df['Total_Count_OORB0013_1']

# Save the normalized count file
normalized_count_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts_01_03_2023.csv'
df.to_csv(normalized_count_file_path, index=False)


In [None]:
import pandas as pd

# Load the count file into a DataFrame
count_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/poolCount.txt'
df = pd.read_csv(count_file_path, sep='\t')

# Calculate the total counts for each replicate
df['Total_Count_OORB0010_1'] = df['OORB0010_1'].sum()
df['Total_Count_OORB0011_1'] = df['OORB0011_1'].sum()
df['Total_Count_OORB0012_1'] = df['OORB0012_1'].sum()
df['Total_Count_OORB0013_1'] = df['OORB0013_1'].sum()

# Normalize counts for each replicate by dividing by its own Total_Count
df['Normalized_OORB0010_1'] = (df['OORB0010_1'] / df['Total_Count_OORB0010_1']).round(3)
df['Normalized_OORB0011_1'] = (df['OORB0011_1'] / df['Total_Count_OORB0011_1']).round(3)
df['Normalized_OORB0012_1'] = (df['OORB0012_1'] / df['Total_Count_OORB0012_1']).round(3)
df['Normalized_OORB0013_1'] = (df['OORB0013_1'] / df['Total_Count_OORB0013_1']).round(3)

# Save the normalized count file
normalized_count_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts_01_03_2023_mod.csv'
df.to_csv(normalized_count_file_path, index=False)


### this is the normalization that worked with the right result

In [None]:
import pandas as pd

# Load the count file into a DataFrame
count_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/poolCount.txt'
df = pd.read_csv(count_file_path, sep='\t')

# Calculate the total counts for each replicate
df['Total_Count_OORB0010_1'] = df['OORB0010_1'].sum()
df['Total_Count_OORB0011_1'] = df['OORB0011_1'].sum()
df['Total_Count_OORB0012_1'] = df['OORB0012_1'].sum()
df['Total_Count_OORB0013_1'] = df['OORB0013_1'].sum()

# Normalize counts for each replicate by dividing by its own Total_Count
df['Normalized_OORB0010_1'] = (df['OORB0010_1'] / df['Total_Count_OORB0010_1'])
df['Normalized_OORB0011_1'] = (df['OORB0011_1'] / df['Total_Count_OORB0011_1'])
df['Normalized_OORB0012_1'] = (df['OORB0012_1'] / df['Total_Count_OORB0012_1'])
df['Normalized_OORB0013_1'] = (df['OORB0013_1'] / df['Total_Count_OORB0013_1'])

# Save the normalized count file
normalized_count_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts_01_03_2023_mod1.csv'
df.to_csv(normalized_count_file_path, index=False)


##### To make the fitness abundance agaian using the normalized values

In [None]:
import pandas as pd

# Load the CSV file
csv_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts_01_03_2023_mod1.csv'
df = pd.read_csv(csv_file_path)

# Save the DataFrame to a text file (txt)
txt_file_path = '/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts.txt'
df.to_csv(txt_file_path, sep='\t', index=False)


In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'Normalized_OORB0010_1'
y_column = 'Normalized_OORB0011_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('Normalized_OORB0010_1 Abundance', fontsize=14)
plt.ylabel('Normalized_OORB0011_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances (37 oC)', fontsize=16, color='purple')
#plt.grid(True)  # Optionally, add grid lines
plt.savefig("fitness_of_TN_abundances_01_03_2024_37oC_Normalized.png", dpi=600, bbox_inches='tight')
plt.show()

In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'Normalized_OORB0012_1'
y_column = 'Normalized_OORB0013_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('Normalized_OORB0012_1 Abundance', fontsize=14)
plt.ylabel('Normalized_OORB0013_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances (45 oC)', fontsize=16, color='purple')
#plt.grid(True)  # Optionally, add grid lines
plt.savefig("fitness_of_TN_abundances_01_03_2024_45oC_Normalized.png", dpi=600, bbox_inches='tight')
plt.show()

In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'Normalized_OORB0010_1'
y_column = 'Normalized_OORB0011_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('Normalized_OORB0010_1 Abundance', fontsize=14)
plt.ylabel('Normalized_OORB0011_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances_37 oC_normalized', fontsize=16, color='purple')
# Adjust the x and y-axis scale ranges
plt.xlim(0, 0.05)  # Adjust the range for the x-axis
plt.ylim(0, 0.05)  # Adjust the range for the y-axis

# Optionally, add grid lines
#plt.grid(True)
#plt.grid(True)  # Optionally, add grid lines
plt.savefig("/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/histogram_plots/fitness_of_TN_abundances_01_09_2024_37oC_Normalized_1.png", dpi=600, bbox_inches='tight')
plt.show()

In [None]:
# Load the data from the CSV file into a DataFrame
df = pd.read_csv('/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/TNSeq_barseq_outputs_Temp2_12_31_2023/normalized_barseq_pool_counts.txt', sep='\t')

# Select the columns you want to use for the scatter plot
x_column = 'Normalized_OORB0012_1'
y_column = 'Normalized_OORB0013_1'

# Create a scatter plot using the selected columns
plt.scatter(df[x_column], df[y_column], color='black')
plt.xlabel('Normalized_OORB0012_1 Abundance', fontsize=14)
plt.ylabel('Normalized_OORB0013_1 Abundance', fontsize=14)
plt.title('Plot of TN_insertion abundances_45 oC_normalized', fontsize=16, color='purple')
# Adjust the x and y-axis scale ranges
plt.xlim(0, 0.05)  # Adjust the range for the x-axis
plt.ylim(0, 0.05)  # Adjust the range for the y-axis

# Optionally, add grid lines
#plt.grid(True)
#plt.grid(True)  # Optionally, add grid lines
plt.savefig("/usr2/people/shollyt22/shollyt22/TnSeq_BarSeq_sequencings/Barseq_Temp_study_11_04_2023/histogram_plots/fitness_of_TN_abundances_01_09_2024_45oC_Normalized_1.png", dpi=600, bbox_inches='tight')
plt.show()