In [89]:
import re
import numpy as np
import pandas as pd
import geopandas as gpd
import fiona
import folium
from folium.plugins import Geocoder
from transformers import pipeline
import spacy
import dateparser
from datetime import datetime

In [76]:
# Note - need to get spacy from hugging face
nlp = spacy.load("en_core_web_sm")

In [4]:
# Read in kml file of oldtime music gatherings
fiona.supported_drivers['KML'] = 'rw'
raw_points = gpd.read_file("./Old Time Open Jams.kml", driver="KML")
raw_points.head()

Unnamed: 0,Name,Description,geometry
0,"*Fairhope, AL",Jam n Folks meet every Tuesday (unless we are ...,POINT Z (-87.90006 30.52789 0.00000)
1,"*Anchorage, AK",Currently on hold<br>Irish session<br>Thursday...,POINT Z (-149.90654 61.19747 0.00000)
2,"*Ester, AK","The Golden Eagle Saloon<br>3630 Main St, Ester...",POINT Z (-148.01637 64.84680 0.00000)
3,"Fairbanks, AK",This jam is Zoom only until further notice. ...,POINT Z (-147.77080 64.83923 0.00000)
4,"*Fairbanks, AK",Irish Session ...,POINT Z (-147.71441 64.85332 0.00000)


In [5]:
raw_points.iloc[470,:]

Name                                               *Brooklyn, NY
Description    Dear Friends,<br>Like many people, due to the ...
geometry                      POINT Z (-74.0169398 40.6756909 0)
Name: 470, dtype: object

In [6]:
raw_points.to_csv("oldtime.csv")

In [35]:
raw_points[raw_points["Description"].str.contains("picnic", case=False)].iloc[7]["Description"]

"93rd Annual Old Fiddler's Picnic<br>A Celebration of Music ~ A Reunion of Friends<br>No date set yet for 2023.<br>Join hundreds of musicians and music lovers to celebrate the 93rd Annual Chester County Old Fiddler's Picnic.<br><br> Hibernia Park <br> Gates open at 8:30 AM<br> Stage registration begins at 9 AM<br> Free Admission and Free Parking<br><br>https://www.chesco.org/2415/Old-Fiddlers-Picnic<br><br>JS 8/24/22"

In [2]:
# Takes 2 min to download
classifier = pipeline("zero-shot-classification")
#classifier("We meet every two weeks", candidate_labels=["weekly gathering", "annual event"])

No model was supplied, defaulted to roberta-large-mnli and revision 130fb28 (https://huggingface.co/roberta-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
Downloading (…)lve/main/config.json: 100%|██████████| 688/688 [00:00<00:00, 76.3kB/s]
Downloading tf_model.h5: 100%|██████████| 1.43G/1.43G [01:20<00:00, 17.8MB/s]
2023-03-21 20:46:34.350792: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint w

{'sequence': 'We meet every two weeks',
 'labels': ['weekly gathering', 'annual event'],
 'scores': [0.9827330112457275, 0.017267024144530296]}

In [14]:
test = "Fiddler's Picnic. 3 day event, first weekend in August.<br>Aug 4-6, 2023<br>https://www.facebook.com/Montana-Old-Time-Fiddlers-Picnic-403947783036154/?<br><br>Updated CR 1/2/23"
classifier(test, candidate_labels=["weekly gathering", "annual event"])

{'sequence': "Fiddler's Picnic. 3 day event, first weekend in August.<br>Aug 4-6, 2023<br>https://www.facebook.com/Montana-Old-Time-Fiddlers-Picnic-403947783036154/?<br><br>Updated CR 1/2/23",
 'labels': ['annual event', 'weekly gathering'],
 'scores': [0.5418637990951538, 0.4581362009048462]}

In [16]:
question_answerer = pipeline(task="question-answering")

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
Downloading (…)lve/main/config.json: 100%|██████████| 473/473 [00:00<00:00, 54.5kB/s]
Downloading tf_model.h5: 100%|██████████| 261M/261M [00:22<00:00, 11.8MB/s] 
Some layers from the model checkpoint at distilbert-base-cased-distilled-squad were not used when initializing TFDistilBertForQuestionAnswering: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a Bert

In [38]:
test = "Fiddler's Picnic. 3 day event, first weekend in August.<br>Aug 4-6, 2023<br>https://www.facebook.com/Montana-Old-Time-Fiddlers-Picnic-403947783036154/?<br><br>Updated CR 1/2/23"
test2 = "93rd Annual Old Fiddler's Picnic<br>A Celebration of Music ~ A Reunion of Friends<br>No date set yet for 2023.<br>Join hundreds of musicians and music lovers to celebrate the 93rd Annual Chester County Old Fiddler's Picnic.<br><br> Hibernia Park <br> Gates open at 8:30 AM<br> Stage registration begins at 9 AM<br> Free Admission and Free Parking<br><br>https://www.chesco.org/2415/Old-Fiddlers-Picnic<br><br>JS 8/24/22"
preds = question_answerer(
    question="What date(s) does this event take place in 2023?",
    context=test2
)
print(
    f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
)

score: 0.2999, start: 85, end: 92, answer: No date


In [58]:
oldtime_events["Description"].iloc[0]

'Jam n Folks meet every Tuesday (unless we are playing someplace) at the Nix Senior Center at 1 Bayou Drive ( a block and half east of North Section Street) in Fairhope. We practice from 9:00am to 12:00pm.<br><br>You can also chat with us at our Facebook page: https://www.facebook.com/groups/345412158921659/<br><br>Instrumentation in Jam n Folks includes mountain dulcimer, hammered dulcimer, mandolins, guitars, violins, viola, recorders, krumhorns, penny whistles, accordion and percussion as required. They have even let the webmaster of this site get away with bringing his soprano and baritone ukuleles. Grab your instrument of choice, come on over and play some tunes with us.\xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0JS 11/30/21'

In [147]:
ner = pipeline("ner", grouped_entities=True)

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some layers from the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing TFBertForTokenClassification: ['dropout_147']
- This IS expected if you are initializing TFBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForTokenClassification were initialized 

In [148]:
doc = nlp(test)
print(test)
for ent in doc.ents:
    print(ent, ent.label_)

print(ner(test))

****ALL LEVELS - BLUEGRASS / OLD TIME COUNTRY/GOSPEL JAM****<br><br>Black Canyon City -High Desert Park<br>2nd Sunday's - Sept thru May<br>1pm-3:30-4pm<br>19001 Jacie Lane, Black Canyon City<br><br>Directions From Phoenix:<br>Take I-17 north and Exit at Rock Springs. Cross freeway to west side, turn north (rt) and follow to Jacie Ln (road at fire station on left), turn left and follow to top of hill. We'll be inside the only building up there. Dress light or in layers as it tends to get warm in there.<br><br>CR 11/2/
Black Canyon City -High Desert Park LOC
Sunday DATE
Jacie Lane PERSON
Black Canyon City LOC
I-17 PRODUCT
Exit at FAC
Rock Springs FAC
Jacie Ln PERSON
11/2/ CARDINAL
[{'entity_group': 'MISC', 'score': 0.37196904, 'word': 'B', 'start': 17, 'end': 18}, {'entity_group': 'LOC', 'score': 0.9785928, 'word': 'Black Canyon City', 'start': 68, 'end': 85}, {'entity_group': 'LOC', 'score': 0.9945238, 'word': 'High Desert Park', 'start': 87, 'end': 103}, {'entity_group': 'LOC', 'score'

In [184]:
# Filter out non-oldtime gatherings
oldtime_events = raw_points.copy()
oldtime_events = oldtime_events[~oldtime_events["Description"].str.contains("irish|celtic|scottish", case=False)]
print("Number of gatherings: %d" % oldtime_events.shape[0])

oldtime_events["clean_desc"] = oldtime_events["Description"].str.replace("(\d|\\|\/|-|\.)*$", "")

test = oldtime_events["clean_desc"].iloc[7]

def classify_event(text: str) -> tuple[str, float]:
    labels = ["weekly gathering", "annual event"]
    probs = classifier(text, candidate_labels=labels)['scores']
    clf = labels[np.argmax(probs)]
    p = probs[np.argmax(probs)]
    return clf, p

def extract_date(text: str) -> tuple[str, datetime, float]:
    preds = question_answerer(
        question="What are the days for this event?",
        context=text
    )
    score = round(preds['score'], 2)
    answer = preds['answer']
    # Remoe second date from date ranges (just want to know when it starts)
    single_date = re.sub("-\d*(?=\s|,)", "", answer)
    # Get datetime object from text representation
    date = dateparser.parse(date_string=single_date, languages=["en"], settings={'PREFER_DATES_FROM': 'future'})
    return (answer, date, score)
    #return (answer, score)

print(test)
print(classify_event(test))
print(extract_date(test))

#print("NAs in category row: %d" % sum(oldtime_events["category"].isna()))

Number of gatherings: 1156
Arizona Old Time Fiddlers<br>Tuesdays, 7-9pm<br>920 N Barkley, Mesa, AZ<br>Contact Crystal Shu 480-851-8362<br>crystalw86@hotmail.com<br>JS 11/30/


  oldtime_events["clean_desc"] = oldtime_events["Description"].str.replace("(\d|\\|\/|-|\.)*$", "")


('weekly gathering', 0.9308522343635559)
('Tuesdays, 7-9pm', None, 0.87)


In [182]:
#d = nlp("2nd Sunday's - Sept thru May")
d = nlp("every monday on the clock")
for ent in d.ents:
    print (ent, ent.label_)


In [None]:
# Create new folium map
m = folium.Map(location=[47.116386, -101.299591], zoom_start = 4)

# Create feature groups
jam_fg = folium.FeatureGroup(name="Jams")
festivals_fg = folium.FeatureGroup(name="Festivals / Camps")

# Create markers for each category and add to feature groups - color code
for i in range(oldtime_events.shape[0]):
    geom = oldtime_events["geometry"].iloc[i]
    name = oldtime_events["Name"].iloc[i]
    desc = oldtime_events["Description"].iloc[i]
    if(oldtime_events["category"].iloc[i] == "jam"):
        marker = folium.Marker(location=[geom.y, geom.x], popup=desc, icon=folium.Icon(color="green")).add_to(jam_fg)
    elif(oldtime_events["category"].iloc[i] == "festival_or_camp"):
        marker = folium.Marker(location=[geom.y, geom.x], popup=desc, icon=folium.Icon(color="red")).add_to(festivals_fg)

# Add feature group to map
jam_fg.add_to(m)
festivals_fg.add_to(m)

# Add layer control to map
m.add_child(folium.map.LayerControl())

In [None]:
Geocoder().add_to(m)
m

In [None]:
#m.save("Old Time Gatherings.html")