In [54]:
import pandas as pd
import numpy as np
import datetime as dt

ths = pd.read_csv('THS.csv')
cols = ["ID","Location","Region","Denomination","Date1","Date2","Number","Notes","Bibliography"]
ths.columns = cols
ths.index = ths['ID']

# function to convert denominations to the standard nummi notation
def convert_denomination(df):
	di = {'K':'20 nummi', 'IS':'16 nummi', 'M':'40 nummi', 'B':'2 nummi', 'A':'1 nummus', 'H':'8 nummi', 'I':'10 nummi', 'E':'5 nummi', 'D':'4 nummi'}
	df = df.replace({'Denomination':di})
	return df

In [55]:
# function to build the coin_finds dataframe
def setting_coin_finds(ths):
    cols_finds = ['hoard_id', 'name', 'startDate', 'endDate', 'type_find', 'hoard?', 'excavation?', 'single?', 'num_coins', 'num_known_coins', 'year_found',
        'year_end_found', 'comments', 'bibliography', 'lat', 'long', 'certainty', 'owner', 'created', 'imported']
    coin_finds = pd.DataFrame(index=ths.index, columns=cols_finds)
    
    ids = pd.Series(ths.index).apply(str)   
    coin_finds['hoard_id'] = 'THS-' + (ids.values)
    coin_finds['name'] = ths['Location'] + ', ' + ths['Region'] + '(' + coin_finds['hoard_id'] + ')'
    coin_finds['place_small'] = ths['Location']
    coin_finds['place_large'] = ths['Region']
    coin_finds['startDate'] = ths['Date1']
    coin_finds['endDate'] = ths['Date2']
    coin_finds['hoard?'] = 0
    coin_finds['excavation?'] = 0
    coin_finds['single?'] = 1
    coin_finds['type_find'] = 'single find'
    coin_finds['num_coins'] = ths['Number']
    coin_finds['num_known_coins'] = ths['Number']
    coin_finds['owner'] = 'Andrei Gandila'
    coin_finds['created'] = pd.Timestamp.now()
    coin_finds['imported'] = pd.Timestamp.now()
    coin_finds['comments'] = ths['Notes']
    coin_finds['bibliography'] = ths['Bibliography']
    
    return coin_finds

In [56]:
# function to set the coin_groups dataframe
def setting_coin_groups(ths):
    cols = ['hoard_id', 'coin_group_id', 'start_year', 'end_year', 'revised_start', 'revised_end', 'ruler', 'revised_ruler',
        'denomination', 'num_coins', 'mint', 'imported', 'created', 'updated']
    coin_groups = pd.DataFrame(index=ths.index, columns=cols)

    ids = pd.Series(ths.index).apply(str)   
    coin_groups['hoard_id'] = 'THS-' + (ids.values)
    coin_groups['coin_group_id'] = coin_groups['hoard_id'] + '-1' # since these are all single finds
    coin_groups['start_year'] = ths['Date1']
    coin_groups['end_year'] = ths['Date2']
    coin_groups['revised_start'] = coin_groups['start_year']
    coin_groups['revised_end'] = coin_groups['end_year']
    coin_groups['ruler'] = 'placeholder'				# 31.1 need to correct this
    coin_groups['revised_ruler'] = coin_groups['ruler']
    coin_groups['denomination'] = ths['Denomination']
    coin_groups['num_coins'] = ths['Number']
    coin_groups['mint'] = 'Thessaloniki'
    coin_groups['imported'] = pd.Timestamp.now()	# this and the next two lines are identical because importing should be a one-off thing.
    coin_groups['created'] = pd.Timestamp.now()
    coin_groups['updated'] = pd.Timestamp.now()

    return coin_groups

In [57]:
ths = convert_denomination(ths)

coin_finds = setting_coin_finds(ths)
coin_groups = setting_coin_groups(ths)

In [None]:
import requests, json

def get_coordinates(place_name):
    gKey = 'AIzaSyAEhSDZteGTpcXp9dYNUhB1AhHuF9r1kFo'
    geoURL = 'https://maps.googleapis.com/maps/api/geocode/json?address=' + place_name + '&key=' + gKey

    r = requests.get(geoURL)
    temp = json.loads(r.text)
    if temp['results'] == []:
        return False
    else:
        lat = temp['results'][0]['geometry']['location']['lat']
        lng = temp['results'][0]['geometry']['location']['lng']
        return([lat, lng])

In [None]:
try:
    coordinates_df = pd.read_csv('coordinates.csv')

except:
    temp = coin_finds['place_small'][:]
    temp_set = set(temp)
    #len(temp_set)
    cols = ['Lat', 'Lng']
    coordinates_df = pd.DataFrame(list(temp_set), columns=['Name'])
    coordinates_df['Lat'] = np.nan
    coordinates_df['Lng'] = np.nan

    coordinates = coordinates_df['Name'].apply(get_coordinates)
    
    lats = []
    lngs = []
    for i in coordinates:
        if i != False:
            lats.append(i[0])
            lngs.append(i[1])
        else:
            lats.append(False)
            lngs.append(False)

    coordinates_df['Lat'] = lats
    coordinates_df['Lng'] = lngs
    coordinates_df = coordinates_df.set_index('Name')

    coordinates_df.to_csv('coordinates.csv')

In [None]:
# uses df to populate coin_finds
for i in range(len(df.index)):
    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'lat'] = coordinates_df['Lat'].iloc[i]
    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'long'] = coordinates_df['Lng'].iloc[i]

coin_finds

In [None]:
coin_groups

In [78]:
with open('ths-bib.txt') as f:
    content = f.readlines()
content = [x.strip() for x in content]

In [80]:
import re
regex = r'([0-9]{4})|([0-9]{4}-[0-9]{4})'

# gets all the required information from the bibliography
def get_info_from_bib(text_line):
    space_loc = text_line.find(" ")
    author = text_line[:space_loc]
    equal_loc = text_line.find(" =")
    
    temp = re.search(regex, text_line)
    years = temp.group() # covers the years in XXXX or XXXX-YYYY format
    
    year_start = temp.start()
    long_author = text_line[:int(year_start)-1]
    
    reference = text_line[equal_loc+3:]
    
    return [author, years, long_author, reference]

#tested and this works
#for i in content[:20]:
#    res = get_info_from_bib(i)
#    print(res)

biblio = []
for i in content[:]:
    #print(i)
    biblio.append(get_info_from_bib(i))

bibliography = pd.DataFrame(biblio, columns=['Author', 'Year', 'Long_Author', 'Reference'])
bibliography.tail(30)

Unnamed: 0,Author,Year,Long_Author,Reference
320,Reisner,1924,Reisner et alii,"G. A. Reisner, C. S. Fisher & D. G. Lyon, Harv..."
321,Ristov,2015,Ristov,"K. Ristov, Gradishte Taor: Late Antique settle..."
322,Rusev,2015,Rusev et alii,"N. Rusev, D. Dragoev, V. Varbanov & D. Pachev,..."
323,Sancaktar,2014,Sancaktar,"H. Sancaktar, Antiokheia (Pisidia) kazı sikkel..."
324,Schönert-Geiss,1991,Schönert-Geiss,"E. Schönert-Geiss, Die Fundmünzen, in Iatrus-K..."
325,Somogyi,1997,Somogyi,"P. Somogyi, Byzantinische Fundmünzen der Aware..."
326,Somogyi,2007,Somogyi,"P. Somogyi, Byzantinische Fundmünzen der Aware..."
327,Stikas,1966,Stikas,"E. Stikas, Anaskaphe Amphipoleos, Praktika tes..."
328,Stikas,1970,Stikas,"E. Stikas, Anaskaphe palaiochristianikon basil..."
329,Stolyarik,1993,Stolyarik,"E. Stolyarik, Essays on monetary circulation i..."


In [59]:
coin_finds.head(3)

Unnamed: 0_level_0,hoard_id,name,startDate,endDate,type_find,hoard?,excavation?,single?,num_coins,num_known_coins,...,comments,bibliography,lat,long,certainty,owner,created,imported,place_small,place_large
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,THS-1,"Adamclisi, Thracia (QE)(THS-1)",572,573,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 14, 1981, 341",,,,Andrei Gandila,2018-02-06 19:19:38.395285,2018-02-06 19:19:38.396691,Adamclisi,Thracia (QE)
2,THS-2,"Enisala, Thracia (QE)(THS-2)",574,575,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 300, no. 1084",,,,Andrei Gandila,2018-02-06 19:19:38.395285,2018-02-06 19:19:38.396691,Enisala,Thracia (QE)
3,THS-3,"Harsova, Thracia (QE)(THS-3)",566,567,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1091",,,,Andrei Gandila,2018-02-06 19:19:38.395285,2018-02-06 19:19:38.396691,Harsova,Thracia (QE)


In [65]:
# this gets all the required information from an entry in the database
def get_info_from_db(text_line):
    comma_loc = text_line.find(",")
    author = text_line[:comma_loc]
    
    temp = re.search(regex, text_line)
    years = temp.group() # covers the years in XXXX or XXXX-YYYY format

    return [author, years]

# tested and this works
#test_list = list(coin_finds['bibliography'].head(50))
#for i in test_list:
#    print(get_info_from_db(i))