In [21]:
import re
from datetime import datetime
import MySQLdb as mariadb
import sys
import os

In [32]:
def problemFinder(text_section): # searches first section of text for a problem, creates two lists one with a boolean value, the other with at least 1 line of the string where a problem is mentioned
    stations = ["KATH12M", "YARRA12M", "HOBART12", "HOBART26"]
    problem_bool = []
    problem_string = []
    for ant in stations:
        regex = ant + ".*\n[\s]{11}.*|" + ant + ".*$"
        problem = re.findall(regex,text_section,re.MULTILINE)
        if len(problem) > 0:
            problem_bool.append(True)
            problem_string.append(problem[0].replace("\n", ""))
        else:
            problem_bool.append(False)
            problem_string.append("")
    return problem_bool, problem_string
    
def percent2decimal(percent_string):
    return float(percent_string.strip('%'))/100

def stationPerformance(text_section): # Extracts the percentage of useable scans for each station.
    stations = ["KATH12M", "YARRA12M", "HOBART12", "HOBART26"]
    station_performance = []
    for ant in stations:
        regex = ant + ".*"
        performance = re.findall(regex,text_section,re.MULTILINE)
        if len(performance) > 0:
            percentage = [s for s in performance[0].split() if '%' in s]
            performance = percent2decimal(percentage[0])
            station_performance.append(performance)
        else:
            station_performance.append(None)
    
    return station_performance
    
def metaData(text_section): # Extracts meta data from the analysis report file.
    date = re.findall("(?<=\$).{7}",text_section,re.MULTILINE)
    date = datetime.strptime(date[0], '%y%b%d').strftime('%Y-%m-%d')
    exp_code = re.findall("(?<=Analysis Report for\s)(.*?(?=\s))",text_section,re.MULTILINE)
    analyser = re.findall("\S.*(?=\sAnalysis Report for\s)",text_section,re.MULTILINE)
    return exp_code[0], analyser[0], date
    # pretty sure this doesn't work post-2099, but like... you shouldn't be using this trash then... right?
    
def stationPositions(text_section): # extracts station positons from the spoolfile
    stations = ["KATH12M", "YARRA12M", "HOBART12", "HOBART26"]
    station_positions = []
    for ant in stations:
        regex = ant + ".*[XYZ]\sComp.*"
        positions = re.findall(regex,text_section,re.MULTILINE)
        positions = [i.split()[5] for i in positions]
        station_positions.append(positions)
    for i in range(0, len(station_positions)):
        if station_positions[i] == []:
            station_positions[i] = ['NULL','NULL','NULL'] # this is a gross hacky way to deal with when a station exists in an analyis report but not the spool file.
    return station_positions


In [33]:
exp_code = 'r1892'
db_name = 'auscope_test'

In [38]:
file_report = 'analysis_reports/' + str(exp_code) + '_report.txt'
file_spool = 'analysis_reports/' + str(exp_code) + '_spoolfile.txt'
with open(file_report) as file:
    contents_report = file.read()
    sections = contents_report.split('-----------------------------------------')
with open(file_spool) as file:
    contents_spool = file.read()
    
meta = metaData(sections[0])
performance = stationPerformance(sections[2])
problems = problemFinder(sections[0])
position = stationPositions(contents_spool)

sql_command = []
station_id = ['Ke', 'Yg', 'Hb', 'Ho']
for i in range(0, len(performance)):
    if performance[i] == None:
        continue
    else:
        sql_station = "INSERT IGNORE INTO " + station_id[i] + " (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES ('{}', {},'{}', {}, {}, {}, {}, '{}' , '{}');".format(meta[0], performance[i], meta[2], position[i][0], position[i][1], position[i][2], problems[0][i], problems[1][i], meta[1])
        sql_command.append(sql_station)
conn = mariadb.connect(user='auscope', passwd='password', db=str(db_name))
cursor = conn.cursor()
for i in range(0,len(sql_command)):
    cursor.execute(sql_command[i])
conn.commit()
conn.close() 

ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 's for met data. ' , 'IVS')' at line 1")

In [62]:
file_report = 'analysis_reports/' + str(exp_code) + '_report.txt'
file_spool = 'analysis_reports/' + str(exp_code) + '_spoolfile.txt'
with open(file_report) as file:
    contents_report = file.read()
    sections = contents_report.split('-----------------------------------------')
with open(file_spool) as file:
    contents_spool = file.read()
    
meta = metaData(sections[0])
performance = stationPerformance(sections[2])
problems = problemFinder(sections[0])
position = stationPositions(contents_spool)

sql_command = []
station_id = ["Ke", "Yg", "Hb", "Ho"]
for i in range(0, len(performance)):
    if performance[i] == None:
        continue
    else:
        sql_station = "INSERT IGNORE INTO {} (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);".format(station_id[i])
        data = [meta[0], performance[i], meta[2], position[i][0], position[i][1], position[i][2], problems[0][i], problems[1][i], meta[1]]
        conn = mariadb.connect(user='auscope', passwd='password', db=str(db_name))
        cursor = conn.cursor()
        cursor.execute(sql_station, data)
        conn.commit()
conn.close() 

In [63]:
sql_station

'INSERT IGNORE INTO Yg (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);'

In [42]:
def test(exp_code, db_name):
    file_report = 'analysis_reports/' + str(exp_code) + '_report.txt'
    file_spool = 'analysis_reports/' + str(exp_code) + '_spoolfile.txt'
    with open(file_report) as file:
        contents_report = file.read()
        sections = contents_report.split('-----------------------------------------')
    with open(file_spool) as file:
        contents_spool = file.read()
    
    meta = metaData(sections[0])
    performance = stationPerformance(sections[2])
    problems = problemFinder(sections[0])
    position = stationPositions(contents_spool)
    
    sql_command = []
    station_id = ['Ke', 'Yg', 'Hb', 'Ho']
    for i in range(0, len(performance)):
        if performance[i] == None:
            continue
        else:
            sql_station = "INSERT IGNORE INTO " + station_id[i] + " (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES ('{}', {},'{}', {}, {}, {}, {}, '{}' , '{}');".format(meta[0], performance[i], meta[2], position[i][0], position[i][1], position[i][2], problems[0][i], problems[1][i], meta[1])
            sql_command.append(sql_station)
    conn = mariadb.connect(user='auscope', passwd='password', db=str(db_name))
    cursor = conn.cursor()
    for i in range(0,len(sql_command)):
        cursor.execute(sql_command[i])
    conn.commit()
    conn.close() 


In [43]:
test('aua048','auscope_test')

FileNotFoundError: [Errno 2] No such file or directory: 'analysis_reports/aua048_report.txt'

In [42]:
sql_station


'INSERT IGNORE INTO Ke (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES (%s, %s,%s, %s, %s, %s, %s, %s , %s);'

In [19]:
position

[['', '', ''],
 ['-2388896314.52', '5043349951.73', '-3078590623.79'],
 ['', '', ''],
 ['', '', '']]

In [37]:
sql_command

["INSERT IGNORE INTO Ke (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES (R1892, 0.764,2019-04-29, -4147354862.32, 4581542304.54, -1573302867.42, True, KATH12M log has all zero's for met data.  , IVS);",
 'INSERT IGNORE INTO Yg (ExpID, Performance, Date, Pos_X, Pos_Y, Pos_Z, Problem, Problem_String, Analyser) VALUES (R1892, 0.846,2019-04-29, -2388896423.75, 5043350053.29, -3078590544.58, True, YARRA12M: Clock breaks used at 00:36, 09:51 and 15:05. Three , IVS);']