In [1]:
import pandas as pd
from sklearn.cluster import DBSCAN

In [2]:
# Load tweets csv file
tweets = pd.read_csv('data/SE3B2D~1.csv')

In [None]:
# List fields
tweets.columns.values.tolist()

In [None]:
# Select min and max dates
min_date = tweets['createdAt'].min()
max_date = tweets['createdAt'].max()
print('Min date: ', min_date)
print('Max date: ', max_date)

In [5]:
# Store longitude and latitude in a list
locations = tweets[['longitude', 'latitude']].values.tolist()

# Remove null values
locations = [x for x in locations if str(x[0]) != 'nan']

# Cluster locations
db = DBSCAN(eps=0.1, min_samples=10).fit(locations)
labels = db.labels_

In [None]:
# Load a csv and convert it to a shapefile
import geopandas as gpd
from shapely.geometry import Point

# Load csv file
df = pd.read_csv('data/SE3B2D~1.csv')

# Convert csv to shapefile
geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)

# Save shapefile
gdf.to_file('data/tweets/SE3B2D~1.shp')

In [13]:
# Remove rows where longitude is null
gdf = gdf[gdf['longitude'].notnull()]

In [14]:
# Display tweets on a map
import folium
from folium.plugins import MarkerCluster

# Create a map
m = folium.Map(location=[48.8566, 2.3522], zoom_start=12)

# Add a marker cluster
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the map
for index, row in gdf.iterrows():
    folium.Marker(location=[row['latitude'], row['longitude']],
                    popup=row['text']).add_to(marker_cluster)