In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import descartes
import geopandas as gpd
from shapely.geometry import Point, Polygon
import pyproj.crs
import geojsonio
import math
import rasterio

%matplotlib inline

In [116]:
data = pd.read_csv('./data/UKR_RUS_conflicts_2018-2022.csv')
df = data.copy()

# convert event_date to datetime
df['event_date'] = pd.to_datetime(df['event_date'])
# filter events by date
df = df[df['event_date'] <= '2021-01-01']
df = df[df['event_type'] != 'Protests']
df = df[df['event_type'] != 'Strategic developments']
df = df[df['latitude'] <= 55]
df = df[df['longitude'] <= 46]

df = df[['event_date', 'event_type', 'sub_event_type', 'actor1', 'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2', 'interaction', 'admin1','location', 'source', 'source_scale', 'notes', 'fatalities']]
df = df.sort_values("event_date", axis=0, ascending=True)
df_notes = df['notes']
#display(np.array(df_notes))






In [117]:
df = df[df['location'].str.contains('Donetsk', na=False)]
display(df.location.value_counts())

Donetsk Filtration Station           940
Donetsk - Donetsk Airport            465
Butovka-Donetskaya Mine              256
Donetsk - Kirovskyi                  197
Donetskyi                            191
Donetsk                              103
Donetsk - Petrovskyi                  49
Donetsk - Central Railway Station     46
Donetsk - Kuibyshivskyi               31
Orikhove-Donetske                     27
Donetsk - Voroshilovsky               12
Donetsk - Leninskyi                    8
Donetsk - Kalininskyi                  5
Donetsk - Kyivskyi                     4
Donetsk - Proletarskyi                 2
Donetsk - Budonivskyi                  1
Name: location, dtype: int64

In [118]:
df = df[['event_date', 'event_type', 'sub_event_type', 'actor1', 'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2', 'interaction', 'admin1','location', 'source', 'source_scale', 'notes', 'fatalities']]
df_notes = df['notes']
pd.options.display.max_colwidth = 700
#display(df_notes.sort_values('fatalities', ascending=False))



## // TODO:

> ### Next step is to analyze the descriptions in the ACLED data to see if we can better understand larger trends in the details of the conflict events. This can use some NLP or bag of words model in order to fulfill the CDA/ML requirements of this project.

> ### Then we can move on to the satellite image analysis. For this I intend to do some sort of image analysis like we did in the homeworks: i.e. direction the statue was facing, ***facial recognition (by taking a mean calculation of a face, and then finding the difference between the mean face and the newly encountered image)***, among others

> ### add UCDP data to other file with the maps. (Covers years 2015-2020)

#### main area of conflict: 
- Longitude: 37-40.1
- Latitude: 46.5-50

## NLP Resources:
- https://towardsdatascience.com/natural-language-processing-event-extraction-f20d634661d3
- https://machinelearningmastery.com/clean-text-machine-learning-python/
-- pretty sure this includes tokenization & removal of stopwords.  But not positive, read to be sure
- Gives meaning to independent words: https://spacy.io/models/en#en_core_web_lg

In [119]:
#display(df_notes.head())

import re

df['notes_list'] = df['notes'].apply(lambda x: re.split(r'\W+', x))
df['notes_list'] = df['notes_list'].apply(lambda x: [word.lower() for word in x])
display(df)

Unnamed: 0,event_date,event_type,sub_event_type,actor1,assoc_actor_1,inter1,actor2,assoc_actor_2,inter2,interaction,admin1,location,source,source_scale,notes,fatalities,notes_list
61377,2018-01-01,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2014-2019),,1,,,0,10,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 1 January 2018, the OSCE SMM observed a total of 5 explosions involving unknown weapons near Donetsk Filtration Station.",0,"[on, 1, january, 2018, the, osce, smm, observed, a, total, of, 5, explosions, involving, unknown, weapons, near, donetsk, filtration, station, ]"
61385,2018-01-01,Battles,Armed clash,Military Forces of Ukraine (2014-2019),,1,NAF: United Armed Forces of Novorossiya,,2,12,Donetsk,Donetsk,OSCE SMM-Ukraine; ITAR-TASS,Other-Regional,"On 1 January 2018, Military Forces of Ukraine fired at the outskirts of Donetsk (Volvo Centre). The OSCE SMM observed a total of 9 bursts of fire involving small arms near Donetsk.",0,"[on, 1, january, 2018, military, forces, of, ukraine, fired, at, the, outskirts, of, donetsk, volvo, centre, the, osce, smm, observed, a, total, of, 9, bursts, of, fire, involving, small, arms, near, donetsk, ]"
61386,2018-01-01,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2014-2019),,1,,,0,10,Donetsk,Donetsk - Central Railway Station,OSCE SMM-Ukraine,Other,"On 1 January 2018, the OSCE SMM observed a total of 2 explosions involving unknown weapons near Donetsk-Central Railway Station.",0,"[on, 1, january, 2018, the, osce, smm, observed, a, total, of, 2, explosions, involving, unknown, weapons, near, donetsk, central, railway, station, ]"
61373,2018-01-01,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2014-2019),,1,,,0,10,Donetsk,Butovka-Donetskaya Mine,OSCE SMM-Ukraine,Other,"On 1 January 2018, the OSCE SMM observed a total of 3 explosions involving unknown weapons [coded as shelling] near Butovka-Donetskaya Mine.",0,"[on, 1, january, 2018, the, osce, smm, observed, a, total, of, 3, explosions, involving, unknown, weapons, coded, as, shelling, near, butovka, donetskaya, mine, ]"
61358,2018-01-02,Battles,Armed clash,Military Forces of Ukraine (2014-2019),,1,NAF: United Armed Forces of Novorossiya,,2,12,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 2 January 2018, the OSCE SMM observed a total of 66 projectiles/explosions/bursts of fire involving unknown weapons in both directions of the frontline near Donetsk Filtration Station.",0,"[on, 2, january, 2018, the, osce, smm, observed, a, total, of, 66, projectiles, explosions, bursts, of, fire, involving, unknown, weapons, in, both, directions, of, the, frontline, near, donetsk, filtration, station, ]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14319,2020-12-25,Battles,Armed clash,Military Forces of Ukraine (2019-),,1,NAF: United Armed Forces of Novorossiya,,2,12,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 25 December 2020, the OSCE SMM observed a total of 3 projectiles/explosions/shots involving unknown weapons in both directions of the frontline near Donetsk Filtration Station.",0,"[on, 25, december, 2020, the, osce, smm, observed, a, total, of, 3, projectiles, explosions, shots, involving, unknown, weapons, in, both, directions, of, the, frontline, near, donetsk, filtration, station, ]"
14325,2020-12-25,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2019-),,1,,,0,10,Donetsk,Donetsk - Kirovskyi,OSCE SMM-Ukraine,Other,"On 25 December 2020, the OSCE SMM observed a total of 1 explosions involving unknown weapons near Donetsk-Kirovskyi.",0,"[on, 25, december, 2020, the, osce, smm, observed, a, total, of, 1, explosions, involving, unknown, weapons, near, donetsk, kirovskyi, ]"
14293,2020-12-27,Battles,Armed clash,NAF: United Armed Forces of Novorossiya,,2,Military Forces of Ukraine (2019-),,1,12,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 27 December 2020, the OSCE SMM observed a total of 7 projectiles/explosions involving unknown weapons in both directions of the frontline near Donetsk Filtration Station.",0,"[on, 27, december, 2020, the, osce, smm, observed, a, total, of, 7, projectiles, explosions, involving, unknown, weapons, in, both, directions, of, the, frontline, near, donetsk, filtration, station, ]"
14281,2020-12-28,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2019-),,1,,,0,10,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 28 December 2020, the OSCE SMM observed a total of 1 explosions involving unknown weapons near Donetsk Filtration Station.",0,"[on, 28, december, 2020, the, osce, smm, observed, a, total, of, 1, explosions, involving, unknown, weapons, near, donetsk, filtration, station, ]"


In [120]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

stop_words = stopwords.words('english')
df['notes_list'] = df['notes_list'].apply(lambda x: [word for word in x if word not in stop_words])

df['notes_list'] = df['notes_list'].apply(lambda x: [word for word in x if word.isalnum()])

In [121]:
#print(stop_words)

In [122]:
display(df)

Unnamed: 0,event_date,event_type,sub_event_type,actor1,assoc_actor_1,inter1,actor2,assoc_actor_2,inter2,interaction,admin1,location,source,source_scale,notes,fatalities,notes_list
61377,2018-01-01,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2014-2019),,1,,,0,10,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 1 January 2018, the OSCE SMM observed a total of 5 explosions involving unknown weapons near Donetsk Filtration Station.",0,"[1, january, 2018, osce, smm, observed, total, 5, explosions, involving, unknown, weapons, near, donetsk, filtration, station]"
61385,2018-01-01,Battles,Armed clash,Military Forces of Ukraine (2014-2019),,1,NAF: United Armed Forces of Novorossiya,,2,12,Donetsk,Donetsk,OSCE SMM-Ukraine; ITAR-TASS,Other-Regional,"On 1 January 2018, Military Forces of Ukraine fired at the outskirts of Donetsk (Volvo Centre). The OSCE SMM observed a total of 9 bursts of fire involving small arms near Donetsk.",0,"[1, january, 2018, military, forces, ukraine, fired, outskirts, donetsk, volvo, centre, osce, smm, observed, total, 9, bursts, fire, involving, small, arms, near, donetsk]"
61386,2018-01-01,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2014-2019),,1,,,0,10,Donetsk,Donetsk - Central Railway Station,OSCE SMM-Ukraine,Other,"On 1 January 2018, the OSCE SMM observed a total of 2 explosions involving unknown weapons near Donetsk-Central Railway Station.",0,"[1, january, 2018, osce, smm, observed, total, 2, explosions, involving, unknown, weapons, near, donetsk, central, railway, station]"
61373,2018-01-01,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2014-2019),,1,,,0,10,Donetsk,Butovka-Donetskaya Mine,OSCE SMM-Ukraine,Other,"On 1 January 2018, the OSCE SMM observed a total of 3 explosions involving unknown weapons [coded as shelling] near Butovka-Donetskaya Mine.",0,"[1, january, 2018, osce, smm, observed, total, 3, explosions, involving, unknown, weapons, coded, shelling, near, butovka, donetskaya, mine]"
61358,2018-01-02,Battles,Armed clash,Military Forces of Ukraine (2014-2019),,1,NAF: United Armed Forces of Novorossiya,,2,12,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 2 January 2018, the OSCE SMM observed a total of 66 projectiles/explosions/bursts of fire involving unknown weapons in both directions of the frontline near Donetsk Filtration Station.",0,"[2, january, 2018, osce, smm, observed, total, 66, projectiles, explosions, bursts, fire, involving, unknown, weapons, directions, frontline, near, donetsk, filtration, station]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14319,2020-12-25,Battles,Armed clash,Military Forces of Ukraine (2019-),,1,NAF: United Armed Forces of Novorossiya,,2,12,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 25 December 2020, the OSCE SMM observed a total of 3 projectiles/explosions/shots involving unknown weapons in both directions of the frontline near Donetsk Filtration Station.",0,"[25, december, 2020, osce, smm, observed, total, 3, projectiles, explosions, shots, involving, unknown, weapons, directions, frontline, near, donetsk, filtration, station]"
14325,2020-12-25,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2019-),,1,,,0,10,Donetsk,Donetsk - Kirovskyi,OSCE SMM-Ukraine,Other,"On 25 December 2020, the OSCE SMM observed a total of 1 explosions involving unknown weapons near Donetsk-Kirovskyi.",0,"[25, december, 2020, osce, smm, observed, total, 1, explosions, involving, unknown, weapons, near, donetsk, kirovskyi]"
14293,2020-12-27,Battles,Armed clash,NAF: United Armed Forces of Novorossiya,,2,Military Forces of Ukraine (2019-),,1,12,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 27 December 2020, the OSCE SMM observed a total of 7 projectiles/explosions involving unknown weapons in both directions of the frontline near Donetsk Filtration Station.",0,"[27, december, 2020, osce, smm, observed, total, 7, projectiles, explosions, involving, unknown, weapons, directions, frontline, near, donetsk, filtration, station]"
14281,2020-12-28,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Ukraine (2019-),,1,,,0,10,Donetsk,Donetsk Filtration Station,OSCE SMM-Ukraine,Other,"On 28 December 2020, the OSCE SMM observed a total of 1 explosions involving unknown weapons near Donetsk Filtration Station.",0,"[28, december, 2020, osce, smm, observed, total, 1, explosions, involving, unknown, weapons, near, donetsk, filtration, station]"


In [123]:
import spacy
nlp = spacy.load('en_core_web_lg')


print(df.shape)


# vector = nlp('ukraine')
# print(vector.vector, vector.vector.shape)


def process_tokens(token_list):
    word_vecs = {}
    words = []
    for token in token_list:
        word = nlp(token)
        words.append(word)
        word_vecs.update({word: word.vector})
    
    #print(words)
    
    return word_vecs
        
        
X = df['notes_list'].apply(lambda x: process_tokens(x))
    

(2337, 17)


In [124]:
print(type(X))
X = pd.DataFrame(list(X))
print(X.shape)
display(X)

<class 'pandas.core.series.Series'>
(2337, 48934)


Unnamed: 0,(1),(january),(2018),(osce),(smm),(observed),(total),(5),(explosions),(involving),...,(total).1,(2),(explosions).1,(involving).1,(unknown),(weapons),(near),(butovka),(donetskaya),(mine)
0,"[-0.30021, 0.14011, -0.027336, 0.20163, 0.025286, -0.38482, -0.37531, -0.25681, 0.35338, 0.47299, -0.37125, 0.34708, -0.065729, 0.22768, -0.32503, -0.011056, 0.14678, 1.8744, -0.45875, 0.03394, 0.18757, 0.13555, -0.13639, 0.24515, 0.25015, -0.54764, -0.39067, -0.0060458, 0.2595, 0.35855, -0.061274, 0.29665, 0.02185, 0.17137, 0.49745, -0.079493, -0.069096, 0.10334, -0.24165, 0.16294, 0.10463, -0.022197, 0.33776, 0.14384, -0.39587, -0.011679, -0.23188, 0.72962, 0.11218, -0.25527, 0.04528, -0.056999, 0.05123, -0.093141, -0.089, 0.25586, 0.093305, 0.61339, 0.62356, 0.09327, 0.075245, -0.064806, 0.15887, 0.11628, -0.36613, -0.41821, 0.38755, 0.096245, -0.31485, 0.053264, 0.19191, 0.468, -0.18...","[0.11004, 0.42661, 0.53735, 0.13191, 0.12797, -0.00040023, 0.33836, -0.13475, 0.43574, 0.52577, -0.85841, -0.098053, -0.092053, 0.086543, -0.2619, 0.2592, -0.12092, 0.70106, 0.33641, -0.016012, 0.31846, 0.0034601, 0.24132, -0.32453, 0.27164, -0.13262, -0.41411, -0.2751, -0.36304, -0.0069113, -0.39594, -0.38304, 0.25458, -0.088153, -0.50788, 0.37916, 0.16204, -0.053748, 0.053205, 0.11549, 0.27516, -0.1694, 0.35898, 0.14175, -0.093098, -0.23483, -0.023857, -0.51815, 0.1931, 0.17221, -0.1525, 0.059361, 0.14345, -0.45644, 0.18275, -0.14669, 0.059166, -0.09264, 0.17413, -0.24145, -0.43544, -0.3969, 0.39498, -0.24389, -0.012599, 0.19683, -0.25079, 0.16252, -0.50998, 0.11345, 0.083847, 0.046375...","[0.21465, 0.17506, 0.12661, -0.086832, -0.31555, -0.45489, 0.1489, 0.93856, 0.56094, 0.34017, -0.40597, 0.4602, 0.2834, 0.46201, -0.031229, -0.09808, 0.10155, 0.53027, 0.082473, -0.35899, 0.1588, 0.16847, 0.38663, 0.10434, -0.10187, -0.067069, -0.40466, -0.6971, 0.11897, 0.65792, -0.21904, -0.2866, 0.46387, 0.24424, 0.3803, -0.30297, 0.85843, 0.061429, 0.13571, 0.39687, -0.12915, 0.27294, 0.72363, 0.29028, -0.24254, -0.58153, 0.33329, -0.28181, -0.3923, 0.12461, -0.034931, 0.081259, -0.40182, -1.3406, 0.58337, -0.28232, 0.092178, 0.23941, -0.017066, -0.42224, -0.39321, -0.15738, -0.28056, 0.34485, -0.44739, 0.28993, -0.15701, -0.56852, 0.012417, -0.02546, -0.23417, -0.44284, 0.031621, 0....","[-0.35006, -1.1327, -0.52144, 0.20918, 0.34644, 0.046067, -0.1086, 0.63729, 0.16362, -1.36, 0.20053, -0.66104, 0.67382, 0.044442, -0.36856, 0.3106, 0.034168, -0.79126, 0.25331, 0.25758, 0.61386, 0.43193, 0.62901, 0.74872, 0.19591, -0.43872, 0.36457, -0.51009, -0.042331, 0.34226, -0.38691, -0.57897, 0.37279, 1.2359, -0.52868, 0.1455, 0.16699, -0.53065, -0.043354, 0.20392, 0.11096, -0.46282, -0.017903, 0.060396, 0.02505, 0.23391, 0.41275, 0.096138, -0.25386, 0.040824, 0.46451, 0.11261, 0.33, -0.63081, -0.5009, -0.79383, -0.017623, 0.42104, 0.23166, -0.14442, -0.27147, -0.34081, -0.11057, -0.61179, 0.0012438, 0.80367, 0.32563, -0.1417, 0.0006392, -0.73693, 0.40974, 0.1489, -0.040631, 0.5156...","[0.31427, 0.10107, -0.029461, 0.24118, -0.40128, -0.33741, 0.071447, -0.40452, 1.2049, -1.5912, -0.34565, -0.51661, 0.083092, 0.057283, 0.56833, 0.34623, 0.13443, -0.5211, -0.27264, -0.0042222, 0.028511, 0.79482, 0.044318, -0.37253, -0.26641, 0.22862, 0.31218, 0.62381, 0.21037, 0.68234, 0.43253, -0.35435, 0.54143, -0.12528, -0.48289, 0.081854, 0.44249, 0.64162, -0.025212, 0.22893, -0.27051, 0.12063, -0.3611, -0.04927, 0.090711, 0.18305, -0.32851, -0.093899, -0.27914, -0.76982, 0.028494, -0.2877, 0.12227, -0.018054, -0.61647, 0.38456, -0.0053701, 0.55127, 0.18506, 0.34201, -0.6954, 0.36641, -0.23859, -1.0729, 0.59354, 0.1451, 0.25235, -0.1571, -0.31517, -0.60207, -0.49106, -0.05625, 0.473...","[-0.29306, 0.052168, -0.5197, 0.032943, -0.49724, 0.38727, -0.016163, 0.50998, -0.052177, 2.4369, 0.5322, -0.039565, -0.070734, 0.076552, -0.12926, 0.29715, 0.23032, 1.1457, -0.33414, -0.19906, -0.048824, -0.46474, -0.25338, 0.039735, -0.15381, -0.10144, 0.022142, 0.094439, -0.23049, 0.16711, -0.313, -0.16012, -0.10853, 0.08251, -0.58979, 0.25936, -0.078222, -0.23468, -0.13283, -0.35532, -0.055838, -0.42957, 0.042217, 0.35229, 0.48023, -0.24814, -0.35492, -0.11329, 0.37768, -0.67443, 0.26433, -0.09247, -0.20659, -0.17703, 0.19371, -0.21878, 0.087154, -0.24579, -0.033658, -0.1245, 0.33248, 0.045688, 0.00030078, 0.25852, -0.38546, -0.14479, 0.2651, -0.01014, 0.15481, 0.202, 0.27208, 0.0562...","[-0.5761, 0.43095, 0.69146, 0.29216, -0.46169, -0.1253, -0.47765, -0.051078, 0.56301, 1.726, -0.054312, 0.3847, -0.21163, -0.10823, 0.045174, 0.178, -0.16524, 2.1399, -0.19995, 0.075172, -0.22522, 0.16287, -0.14428, 0.074662, 0.048475, -0.47295, -0.16337, 0.51307, -0.0066485, 0.63872, 0.27954, 0.067067, 0.50129, 0.064162, -0.3151, -0.37201, -0.76677, -0.1478, 0.1487, -0.11317, -0.35709, 0.55349, 0.53952, 0.11237, 0.31356, -0.53125, 0.25289, 1.1923, 0.22244, 0.19295, -0.021549, -0.38555, 0.83781, -0.75824, -0.024208, 0.38448, 0.16505, 0.13253, 0.23947, -0.24991, 0.24926, 0.35136, -0.5914, 0.30512, -0.23528, -0.053259, 0.48304, 0.19753, -0.47024, -0.39064, 0.62204, -0.0085589, 0.38941, -0....","[-0.50352, 0.40115, 0.00074387, -0.25584, -0.11155, -0.40644, -0.27502, -0.063868, 0.51318, 0.64975, -0.25983, 0.21782, -0.32361, 0.072475, -0.3266, -0.052709, -0.047091, 1.7412, -0.3076, -0.047853, -0.17702, 0.18175, -0.22485, 0.097834, -0.035203, -0.32414, -0.26044, -0.15988, 0.46085, 0.11654, 0.1829, 0.16528, 0.12672, 0.16832, 0.43282, -0.15417, 0.14652, 0.08555, -0.10295, 0.13137, 0.13274, -0.38016, 0.31656, 0.06645, 0.16702, 0.1931, -0.28226, 0.13729, -0.24983, -0.31101, 0.10632, -0.1129, 0.10807, -0.60411, -0.15283, 0.57284, -0.091565, 0.68318, 0.20532, 0.28497, 0.43628, -0.37214, 0.11751, 0.063318, -0.51093, -0.60985, 0.27285, 0.28481, -0.3123, 0.21364, 0.38334, 0.058893, 0.23231,...","[-0.62407, -0.35501, -0.39783, -0.20585, 0.031339, 0.11398, -0.39236, 0.54191, 0.11791, 0.97865, 0.40798, -0.55236, 0.24408, 0.29768, -0.64456, 0.0030468, 0.42704, 0.19641, -0.6356, -0.63649, 0.25703, -0.11913, 0.12688, -0.41116, -0.0051524, 0.2148, -0.28221, 0.58042, 0.30527, -0.26333, 0.2509, -0.41713, -0.86555, 0.71864, 0.021149, -0.09059, -0.11333, 1.0561, 0.34141, 0.2847, 0.23522, -0.55004, -0.039311, 0.48581, 0.12152, 0.44649, -0.63793, 0.25872, 0.092675, -0.25348, 0.76257, 0.36852, -0.23569, 0.58199, -0.62255, 0.15421, 0.18593, -0.56319, 0.32193, -0.15297, 0.18283, -0.14643, -0.13346, 0.3292, -0.18172, 0.095381, 0.62549, 0.11179, -0.55748, 0.8132, 0.18366, -0.68666, -0.20193, 0.75...","[-0.46455, -0.18668, -0.33983, -0.10933, -0.33811, -0.13939, 0.26904, 0.10281, 0.51451, 2.6984, -0.13853, -0.13749, 0.34576, 0.18462, -0.15366, -0.28537, 0.20458, 0.86634, -0.16466, -0.18971, 0.081543, 0.12116, -0.68767, 0.21, 0.49544, -0.30107, 0.28451, 0.1413, 0.24395, -0.020641, 0.23118, -0.015564, 0.16849, 0.044572, 0.21318, -0.03368, 0.032863, 0.094834, 0.23953, 0.11488, -0.10483, -0.087471, 0.23506, 0.11663, -0.45706, 0.50859, 0.0014651, 0.36644, 0.097281, -0.028328, 0.21986, 0.39304, 0.14297, 0.0067533, -0.31477, 0.25497, 0.23036, -0.31691, 0.055512, 0.25925, 0.37906, 0.43444, -0.11676, -0.060595, -0.16497, -0.35854, 0.17107, 0.14683, -0.62423, -0.15344, 0.50886, -0.37357, 0.13519...",...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2332,,,,,,,,,,,...,,,,,,,,,,
2333,,,,,,,,,,,...,,,,,,,,,,
2334,,,,,,,,,,,...,,,,,,,,,,
2335,,,,,,,,,,,...,,,,,,,,,,


In [125]:
print(type(X))
#display(type(df['event_vectors'].tolist()))

<class 'pandas.core.frame.DataFrame'>


In [128]:
from sklearn.cluster import DBSCAN


n_classes = {}

dbscan = DBSCAN(eps=eps, min_samples=2, metric='cosine').fit(X)

# for eps in np.arange(.0001, 5, .5):
#     dbscan = DBSCAN(eps=eps, min_samples=2, metric='cosine').fit(X)
#     n_classes.update({eps: len(pd.Series(dbscan.labels_).value_counts())})

ValueError: setting an array element with a sequence.