### Imports

In [183]:
# !pip install -U spacy
# !pip install nltk
# !python3 -m spacy download en_core_web_sm
from pprint import pprint
import string
import numpy as np
p=string.punctuation

import nltk
import collections
from nltk.tokenize import wordpunct_tokenize
import jsonlines
import collections

In [184]:
import pandas as pd
import spacy
nlp=spacy.load('en_core_web_sm')

In [185]:
!ls ../data/ADE20K-pairs/captions/

ade20k_train_captions.jsonl	  train_captions      val_captions
ade20k_validation_captions.jsonl  train_captions.zip  val_captions.zip


In [186]:
train_dir = "../data/ADE20K-pairs/captions/ade20k_train_captions.jsonl"
val_dir = "../data/ADE20K-pairs/captions/ade20k_validation_captions.jsonl"

train_df = pd.read_json(train_dir,lines=True, orient="records")[['image_id','caption']]
train_df = train_df.drop_duplicates(subset="image_id", keep="first")
val_df = pd.read_json(val_dir,lines=True, orient="records")[['image_id','caption']]
val_df = val_df.drop_duplicates(subset="image_id", keep="first")
train_df.head()

Unnamed: 0,image_id,caption
0,ADE_train_00003661,In this picture I can see the inside view of a...
1,ADE_train_00003722,This is a picture taken inside of a room in th...
2,ADE_train_00013827,"In this image we can see monitors, keyboards, ..."
3,ADE_train_00009835,There is a building at the bottom of this imag...
4,ADE_train_00002932,At the bottom of the image there is a bed with...


In [34]:
val_df.head()

Unnamed: 0,image_id,caption
0,ADE_val_00001410,In this picture I can see the vehicles on the ...
1,ADE_val_00001113,In this image I can see water and I can also s...
2,ADE_val_00000424,"In this image we can see a table, light, book,..."
3,ADE_val_00001210,This image is taken outdoors. At the top of th...
4,ADE_val_00001480,"In this image, there are cupboards, stove, mic..."


In [35]:
train_df.shape, val_df.shape

((20210, 2), (2000, 2))

### Localized Narratives Descriptive Stats

In [50]:
lines = train_df.caption.to_list()

tokens = []
tokens.extend([w.lower() for line in lines for w in wordpunct_tokenize(line)])
print("Total tokens: ", len(tokens))
print("Total unique tokens: ", len(set(tokens))) #Total unique tokens

print("Avg. no. of tokens: ", \
      np.mean([len(wordpunct_tokenize(i)) for i in lines])) #Average number of tokens in the training set

print("Avg. no. of characters: ",\
      np.mean([len(i) for i in lines])) #Average sequence length



Total tokens:  983687
Total unique tokens:  3247
Avg. no. of tokens:  48.6732805541811
Avg. no. of characters:  211.35106382978722


In [54]:
!pwd

/project/dataset-exploration/clip-finetune-ade20k/notebooks


In [187]:
train = dict()
val = dict()

basepath = "../data/"

with jsonlines.open(basepath+'ade20k-train-combined.jsonl','r') as f:
    for row in f:
        train.update(row)        

with jsonlines.open(basepath+'ade20k-val-combined.jsonl','r') as f:
    for row in f:
        val.update(row)        
len(train.keys()), len(val.keys())



(20210, 2000)

In [188]:
type(train)

dict

In [189]:
train['ADE_train_00003661'].keys()

dict_keys(['scene-type-coarse', 'scene-type-fine', 'caption', 'objects', 'qa-pairs', 'qa-binary'])

In [117]:
coarse_ = []

for k, v in train.items():
    coarse_.append(v['scene-type-coarse'])
    
print("scene-type distribution - train: ", collections.Counter(coarse_))

coarse_ = []
for k, v in val.items():
    coarse_.append(v['scene-type-coarse'])
    
print("scene-type distribution - val: ", collections.Counter(coarse_))

scene-type distribution - train:  Counter({'urban': 5801, 'home or hotel': 5219, 'unclassified': 2294, 'nature landscape': 1725, 'work place': 1189, 'sports and leisure': 1183, 'cultural': 1005, 'shopping and dining': 878, 'transportation': 653, 'industrial': 263})
scene-type distribution - val:  Counter({'urban': 565, 'home or hotel': 534, 'unclassified': 223, 'nature landscape': 150, 'work place': 127, 'sports and leisure': 121, 'cultural': 102, 'shopping and dining': 89, 'transportation': 62, 'industrial': 27})


In [315]:
train_img_embedding_pairs = np.load("../outputs/clip-embeddings/finetuned-img-embeddings-train.npy", allow_pickle=True)
train_img_embedding_pairs = train_img_embedding_pairs.item()

val_img_embedding_pairs = np.load("../outputs/clip-embeddings/finetuned-img-embeddings-val.npy", allow_pickle=True)
val_img_embedding_pairs = val_img_embedding_pairs.item()

In [172]:
import umap

In [316]:
ids, embs = list(train_img_embedding_pairs.keys()), list(train_img_embedding_pairs.values())

In [179]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style='white', context='poster')

In [317]:
coarse_labels = []
for i in ids:
    coarse_labels.append(train[i]['scene-type-coarse'])

In [318]:
umap_model = umap.UMAP(n_neighbors=20, 
                  n_components=2, 
                  min_dist=0.05, 
                  metric='cosine',
                  densmap=False,
                  verbose=True,
                 )

In [322]:
data = np.array(np.vstack([embs]), dtype=np.float64)
target = np.hstack([coarse_labels])
data.shape, target.shape

embedding = umap_model.fit_transform(data, y=target)

UMAP(angular_rp_forest=True, dens_frac=0.0, dens_lambda=0.0, metric='cosine',
     min_dist=0.05, n_neighbors=20, verbose=True)
Construct fuzzy simplicial set
Sun Sep  5 22:21:19 2021 Finding Nearest Neighbors
Sun Sep  5 22:21:19 2021 Building RP forest with 12 trees
Sun Sep  5 22:21:19 2021 NN descent for 14 iterations
	 1  /  14
	 2  /  14
	 3  /  14
	 4  /  14
	Stopping threshold met -- exiting after 4 iterations
Sun Sep  5 22:21:21 2021 Finished Nearest Neighbor Search



Beginning in version 0.22, arrays of bytes/strings will be converted to decimal numbers if dtype='numeric'. It is recommended that you convert the array to a float dtype before using it in scikit-learn, for example by using your_array = your_array.astype(np.float64).



Sun Sep  5 22:21:22 2021 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Sun Sep  5 22:21:35 2021 Finished embedding


In [323]:
from umap import UMAP
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'iframe' # or 'notebook' or 'colab' or 'jupyterlab'

classes = list(set(coarse_labels))
colors = px.colors.qualitative.Bold
color_discrete_map = dict(zip(classes, colors))

In [324]:
fig = px.scatter(
    embedding, x=0, y=1,
    color=target, labels={'color': 'class'}, color_discrete_map=color_discrete_map
)


fig.update_layout(legend=dict(
    yanchor="bottom",
    y=0.01,
    xanchor="left",
    x=0.01,
    font=dict(size=12,),
    itemsizing='trace'
))

fig.show()

#colors: https://plotly.com/python/discrete-color/

In [325]:
val_ids, val_embs = list(val_img_embedding_pairs.keys()), list(val_img_embedding_pairs.values())
val_embs = umap_model.transform(val_embs)
coarse_labels_val = []
for i in val_ids:
    coarse_labels_val.append(val[i]['scene-type-coarse'])

Sun Sep  5 22:21:44 2021 Worst tree score: 0.58164275
Sun Sep  5 22:21:44 2021 Mean tree score: 0.59373660
Sun Sep  5 22:21:44 2021 Best tree score: 0.60400792
Sun Sep  5 22:21:44 2021 Forward diversification reduced edges from 404200 to 140990
Sun Sep  5 22:21:44 2021 Reverse diversification reduced edges from 140990 to 140990
Sun Sep  5 22:21:44 2021 Degree pruning reduced edges from 167288 to 167250
Sun Sep  5 22:21:44 2021 Resorting data and graph based on tree order
Sun Sep  5 22:21:44 2021 Compressing index by removing unneeded attributes
Sun Sep  5 22:21:44 2021 Building and compiling search function
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs


In [326]:

fig = px.scatter(
    val_embs, x=0, y=1,
    color=coarse_labels_val, labels={'color': 'class'}, color_discrete_map=color_discrete_map
)


fig.update_layout(legend=dict(
    yanchor="bottom",
    y=0.01,
    xanchor="left",
    x=0.01,
    font=dict(size=12,),
    itemsizing='trace'
))

fig.show()

In [329]:
import plotly.graph_objects as go

animals=['giraffes', 'orangutans', 'monkeys']

fig = go.Figure(data=[
    go.Bar(name='SF Zoo', x=animals, y=[20, 14, 23], text=[20, 14, 23], textposition='auto'),
    go.Bar(name='LA Zoo', x=animals, y=[12, 18, 29], text=[12, 18, 29], textposition='auto')
])

fig.update_layout(barmode='stack')
fig.show()