In [1]:
from ftplib import FTP
import re
import os
import MySQLdb as mariadb


In [2]:
def checkExistingData(db_name):
    # db_name should be the name of the auscope database (as a string) we want to query for 
    #  unique existing experiment IDs
    conn = mariadb.connect(user='root', passwd='', db=db_name)
    cursor = conn.cursor()
    station_key = ['Ke', 'Yg', 'Hb', 'Ho']
    existing_experiments = []

    for ant in station_key:
        query = "SELECT ExpID FROM " + ant
        cursor.execute(query)
        result_list = [item for sublist in cursor.fetchall() for item in sublist]
        existing_experiments.append(result_list)
    
    existing_experiments = [item for sublist in existing_experiments for item in sublist]
    unique_existing_experiments = set(existing_experiments)
    return unique_existing_experiments

In [3]:
print(checkExistingData('auscope_test'))

{'R1892', 'R4876', 'R1876'}


In [4]:
with open('master16.txt') as file:
    contents = file.readlines()

valid_experiment = []
for line in contents:
    line = line.split('|')
    if len(line) > 13:
        regex = '(?<!-)Ke|(?<!-)Yg|(?<!-)Hb|(?<!-)Ho'
        participated = re.findall(regex,line[7],re.MULTILINE)
        if len(participated) > 0:
            valid_experiment.append(line[2].strip())
            
existing_experiments = checkExistingData('auscope_test')
experiments_to_download = [x for x in valid_experiment if x not in existing_experiments]

# This ultimately produces a list of experiments from the given years master schedule that 
#  do not exist yet in the database.


In [5]:
experiments_to_download

['AUG020',
 'R1722',
 'CRF92',
 'R4722',
 'R1723',
 'RD1601',
 'R4723',
 'R1724',
 'CRDS81',
 'R4724',
 'R1725',
 'AOV007',
 'R4725',
 'R1726',
 'OHG100',
 'OHG101',
 'R4726',
 'R1727',
 'T2109',
 'OHG102',
 'R4727',
 'R1728',
 'AUA009',
 'CRF93',
 'R4728',
 'R1729',
 'R4729',
 'R1730',
 'R4730',
 'R1731',
 'AUG021',
 'AOV008',
 'R4731',
 'R1732',
 'R1733',
 'RD1602',
 'R4733',
 'AUG022',
 'AUG023',
 'T2110',
 'CRDS82',
 'R4734',
 'R1735',
 'AUA010',
 'R4735',
 'R1736',
 'R4736',
 'R1737',
 'R4737',
 'R1738',
 'R4738',
 'R1739',
 'AOV009',
 'R4739',
 'CRDS83',
 'R1740',
 'RD1604',
 'R4740',
 'R1741',
 'R4741',
 'R1742',
 'R4742',
 'R1743',
 'AUG024',
 'R4743',
 'R1744',
 'AUG025',
 'R4744',
 'R1745',
 'T2111',
 'R4745',
 'R1746',
 'RD1605',
 'CRF95',
 'R4746',
 'R1747',
 'R4747',
 'R1748',
 'AUA011',
 'AUG026',
 'R4748',
 'R1749',
 'RD1606',
 'CRDS84',
 'R4749',
 'R1750',
 'APSG38',
 'AOV010',
 'R4750',
 'R1751',
 'R4751',
 'R1752',
 'AUA012',
 'R4752',
 'R1753',
 'R4753',
 'R1754',
 '

In [7]:
# This is the part of the code that downloads any files that exist in the list of valid experiments.
# Please note, it will currently not download the files for any experiment that already have a file existing
#  in the directory. So it will not get the most up-to-date files without adding more complex conditions.

ftp = FTP('cddis.gsfc.nasa.gov')
#print(ftp.getwelcome())
ftp.login()

year = '2016'
for exp in experiments_to_download:
    if os.path.isfile(os.getcwd()+'/analysis_reports/'+exp.lower()+'_report.txt'):
        continue
    else:
        exp = exp.lower()
        ftp.cwd('/vlbi/ivsdata/aux/'+str(year)+ '/' + exp)
        options = ['ivs', 'IVS', 'usno', 'USNO']
        for spelling in options:
            filename_report = []
            filename_spool = []
            ftp.retrlines('LIST '+exp+'-'+spelling+'-analysis-report*', filename_report.append)
            ftp.retrlines('LIST '+exp+'-'+spelling+'-analysis-spoolfile*', filename_spool.append)
            if len(filename_report) > 0:
                local_filename_report = os.path.join(os.getcwd(), 'analysis_reports/' + exp + '_report.txt')
                local_filename_spool = os.path.join(os.getcwd(), 'analysis_reports/' + exp + '_spoolfile.txt')
                lf1 = open(local_filename_report, "wb")
                lf2 = open(local_filename_spool, "wb")
                ftp.retrbinary("RETR " + filename_report[len(filename_report)-1].split()[8], lf1.write)
                ftp.retrbinary("RETR " + filename_spool[len(filename_report)-1].split()[8], lf2.write)
                lf1.close()
                lf2.close()
                break
        

KeyboardInterrupt: 

In [30]:
os.getcwd()+'/analysis_reports/'+exp+'_report.txt'

'/home/tiege/Documents/research/auscope/metric_ingest/analysis_reports/r4876_report.txt'

In [46]:
experiments_to_download

['R1875',
 'R4875',
 'R4876',
 'R1877',
 'CRDS99',
 'AUA048',
 'R4877',
 'AOV031',
 'R1878',
 'R4878',
 'R1879',
 'T2130',
 'R4879',
 'R1880',
 'OHG117',
 'RD1902',
 'R4880',
 'R1881',
 'AOV032',
 'AUA049',
 'R4881',
 'CRD100',
 'R1882',
 'R1883',
 'R4883',
 'R1884',
 'R4884',
 'R1885',
 'AUA050',
 'R4885',
 'R1886',
 'APSG44',
 'AOV033',
 'R4886',
 'R1887',
 'T2131',
 'CRD101',
 'R4887',
 'R1888',
 'OHG118',
 'AOV034',
 'R4888',
 'R1889',
 'RD1903',
 'CRF110',
 'R4889',
 'AUA051',
 'R1890',
 'APSG45',
 'R4890',
 'R1891',
 'R4891',
 'R1892',
 'R4892',
 'CRD102',
 'R1893',
 'RD1904',
 'R4893',
 'R1894',
 'AOV035',
 'AUA052',
 'R4894',
 'R1895',
 'T2132',
 'R4895',
 'R1896',
 'R4896',
 'R1897',
 'OHG119',
 'R4897',
 'R1898',
 'R4898',
 'R1899',
 'AOV036',
 'AUA053',
 'R4899',
 'R1900',
 'CRD103',
 'RD1905',
 'R4900',
 'R1901',
 'CRF111',
 'R4901',
 'R1902',
 'AUA054',
 'RD1906',
 'R4902',
 'R1903',
 'CRF112',
 'AOV037',
 'R4903',
 'R1904',
 'T2133',
 'RD1907',
 'R4904',
 'R1905',
 'R4905

In [9]:
filename_report


[]

In [19]:
ftp = FTP('cddis.gsfc.nasa.gov')
#print(ftp.getwelcome())
ftp.login()
ftp.cwd('/vlbi/ivsdata/aux/'+str(year)+ '/' + 'r4875')
filename_report = []

In [20]:
ftp.retrlines('LIST '+'r4875'+'-'+'ivs'+'-analysis-report*', filename_report.append)


'226 Directory send OK.'

In [26]:
len(filename_report) -1

1

In [9]:
from ftplib import FTP
ftp = FTP('cddis.gsfc.nasa.gov')
ftp.login()
exp = 'crd103'
year = 2019

In [10]:
filename_skd = []
ftp.retrlines('LIST /vlbi/ivsdata/aux/'+str(year)+ '/' + exp +'.skd', filename_skd.append)

'226 Directory send OK.'

In [11]:
filename_skd

[]

In [13]:
import os

os.path.isfile(os.getcwd()+'/analysis_reports/'+'r1900'+'_report.txt')

False

In [14]:
def checkExistingData(db_name):
    # db_name should be the name of the auscope database (as a string) we want to query for 
    #  unique existing experiment IDs
    conn = mariadb.connect(user='auscope', passwd='password', db=db_name)
    cursor = conn.cursor()
    station_key = ['Ke', 'Yg', 'Hb', 'Ho']
    existing_experiments = []

    for ant in station_key:
        query = "SELECT ExpID FROM " + ant
        cursor.execute(query)
        result_list = [item for sublist in cursor.fetchall() for item in sublist]
        existing_experiments.append(result_list)
    
    existing_experiments = [item for sublist in existing_experiments for item in sublist]
    unique_existing_experiments = set(existing_experiments)
    return unique_existing_experiments


In [15]:
def validExpFinder(master_schedule):
    schedule = str(master_schedule)
    with open(schedule) as file:
        schedule_contents = file.readlines()
    valid_experiment = []
    for line in schedule_contents:
        line = line.split('|')
        if len(line) > 13:
            regex = '(?<!-)Ke|(?<!-)Yg|(?<!-)Hb|(?<!-)Ho'
            participated = re.findall(regex,line[7],re.MULTILINE)
            if len(participated) > 0:
                valid_experiment.append(line[2].strip())
    return valid_experiment

In [16]:
schedule = 'master19.txt'

NameError: name 'master19' is not defined