## Convert the .txt file to .csv file

In [1]:
import csv

def process_input_file(input_file, output_file):
    # Open input file for reading
    with open(input_file, 'r') as f:
        lines = f.readlines()

    # Initialize variables
    current_drug = {}
    data = []

    # List of all possible fields
    fieldnames = ['DRUG__ID', 'TRADNAME', 'DRUGCOMP', 'THERCLAS', 'DRUGTYPE', 'DRUGINCH', 'DRUGINKE', 'DRUGSMIL', 'HIGHSTAT', 'COMPCLAS']

    # Process each line in the input file
    for line in lines:
        parts = line.split('\t')
        if len(parts) < 2:
            continue  # skip lines that don't have enough parts
        key = parts[0].strip()
        value = parts[1].strip()
        if key in fieldnames:
            current_drug[key] = value
        if key == 'COMPCLAS':
            # Assuming that each drug entry ends after COMPCLAS
            # Initialize missing fields with empty strings
            for field in fieldnames:
                if field not in current_drug:
                    current_drug[field] = ''
            data.append(current_drug.copy())
            current_drug = {}

    # Write data to CSV file
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write header
        writer.writeheader()
        
        # Write rows
        for drug in data:
            writer.writerow(drug)


input_file = 'P1-02-TTD_drug_download_copy.txt'
output_file = 'P1-02-TTD_drug_download_copy.csv'
process_input_file(input_file, output_file)

In [2]:
import pandas as pd
df_path = 'P1-02-TTD_drug_download_copy.csv'
df = pd.read_csv(df_path, sep=',')

In [3]:
df

Unnamed: 0,DRUG__ID,TRADNAME,DRUGCOMP,THERCLAS,DRUGTYPE,DRUGINCH,DRUGINKE,DRUGSMIL,HIGHSTAT,COMPCLAS
0,D00AIO,"""PassPort (insulin), Altea; Insulin (controlle...",UNIVERSITA' DEGLI STUDI DI MILANO-BICOCCA UNIV...,Anticancer Agents,Small molecular drug,"""1S/C18H14N2O/c1-21-13-8-6-12(7-9-13)14-10-11-...",PJJCQNCRSXRPLV-UHFFFAOYSA-N,COC1=CC=C(C=C1)C2=C3C4=CC=CC=C4NC3=NC=C2,Patented,Pyridoindole derivatives
1,D00BXK,,"""7TM PHARMA A/S RECEVEUR, Jean-Marie NIELSEN, ...",,Small molecular drug,"""1S/C24H18Cl2FN5O/c25-15-7-5-14(6-8-15)23-18(1...",QWOVGBHZFLBBHN-UHFFFAOYSA-N,C1=CC=C(C(=C1)N2C(=C(C(=N2)C(=O)NC3=CC=C(C=C3)...,Patented,Pyrazole derivatives
2,D00CRQ,,INST OF MATERIA MEDICA CHINESE ACADEMY OF MEDI...,,Small molecule immunotherapy,"""1S/C28H29BrClNO5/c1-18(2)11-12-35-25-14-26(23...",HXFYELRCMXNMTR-UHFFFAOYSA-N,CC(=CCOC1=CC(=C(C=C1CNC(CO)C(=O)O)Cl)OCC2=C(C(...,Patented,Phenylate derivatives
3,D00DDX,,BAYER PHARMA AKTIENGESELLSCHAFT,,Small molecular drug,"""1S/C19H16F4N4O3S/c1-30-15-7-12(20)3-4-13(15)1...",WUAZMKUDCHZMEP-UHFFFAOYSA-N,COC1=C(C=CC(=C1)F)C2=CC(=NC=N2)NC3=CC(=CC(=N3)...,Patented,N-(pyridin-2-yl)pyrimidin-4-amine derivatives
4,D00DIZ,,"""ARRAY BIOPHARMA INC. ALLEN, Shelley ANDREWS, ...",,Small molecular drug,"""1S/C23H26FN5O4/c24-15-8-14(9-18(10-15)33-13-1...",YDZSAJWQPHVDHL-UUSAFJCLSA-N,C1CC(N(C1)C2=NC3=C(C=NN3C=C2)C(=O)NC4CC4)C5=CC...,Patented,"""Pyrazolo[1,5-a]pyrimidine derivatives"""
...,...,...,...,...,...,...,...,...,...,...
1809,D0U8YP,,Isis Pharmaceuticals,,Small molecule immunotherapy,,,C1=CC=C2C(=C1)C(=C3C=CC(=O)C=C3)NN=C2NC4=CC(=C...,Investigative,Benzyl phenyl ether derivatives
1810,D0UJ9J,,Isis Pharmaceuticals,,Small molecule immunotherapy,,,,Investigative,Sulfamonomethoxine derivatives
1811,D0V4JV,,Isis Pharmaceuticals,,Small molecule immunotherapy,,,C1=CC(=CC=C1CCC2=CNC3=C2C(=O)N=C(N3)N)C(=O)NC(...,Investigative,Aromatic acetylene derivatives
1812,D0WX6J,,Isis Pharmaceuticals,Antiviral Agents,Peptide,,,CN1CCC2=C(C1)C3=NC(=O)C(=C3C=C2C4=CC=C(C=C4)S(...,Investigative,Peptidomimetics
