In [15]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import random
from sklearn.metrics.pairwise import cosine_similarity
import math
from tqdm import tqdm
from dateutil import tz
import matplotlib.pyplot as plt
%matplotlib inline

In [16]:
import psycopg2 as pg2
import psycopg2.extras as pgex
this_host='34.211.59.66'
this_user='postgres'
this_password='postgres'
conn = pg2.connect(host = this_host, 
                        user = this_user,
                        password = this_password)

sql_select = '''select created_at, location, tweet_content, cleaned_tweet, hashtags from tweets where hashtags is not null;'''

cur = conn.cursor(cursor_factory=pgex.RealDictCursor)
cur.execute(sql_select)
rows = cur.fetchall()
conn.close()
df = pd.DataFrame(rows)
df.reset_index(inplace = True)

In [17]:
df['created_datetime'] = pd.to_datetime(df['created_at'])
df['year'] = df.created_datetime.apply(lambda x: x.year)
df['month'] = df.created_datetime.apply(lambda x: x.month)
df['day'] = df.created_datetime.apply(lambda x: x.day)
#df['dayofweek'] = df.created_datetime.apply(lambda x: x.dayofweek)
df['hour'] = df.created_datetime.apply(lambda x: x.hour)

In [None]:
import pickle
!pip install redis
import redis
redis_ip = '34.211.59.66'
r = redis.StrictRedis(redis_ip)
r.keys()

In [None]:
len(df)

In [None]:
hastages_series = df['hashtags']

In [None]:
count_vectorizer = CountVectorizer(min_df = 1, stop_words='english')
hashtags_countvec_fit = count_vectorizer.fit(hastages_series)
hashtags_countvec = pickle.dumps(hashtags_countvec_fit)
r.set('hashtags_countvec_fit', hashtags_countvec)

In [None]:
hashtags_countvec = pickle.loads(r.get('hashtags_countvec_fit'))

In [None]:
hashtag_name = hashtags_countvec.get_feature_names()
hashtag_name[5000]

In [None]:
time_delta = max(df['created_datetime']) - min(df['created_datetime'])
time_window = time_delta.components.days*24 + time_delta.components.hours
time_lag = timedelta(hours = 6)
time_gap = timedelta(hours = 2)
windows = int(round(time_delta/time_gap,0))
print('start time: ', min(df['created_datetime']),'\n',\
      'end time:  ', max(df['created_datetime']),'\n',\
      'total hours: ',time_window,'\n',\
      'time lag: ', time_lag,'\n',\
      'time gap: ', time_gap,'\n',\
      'time windows: ', windows)

In [None]:
time_delta = max(df['created_datetime']) - min(df['created_datetime'])
time_window = time_delta.components.days*24 + time_delta.components.hours
time_lag = timedelta(hours = 2)
time_gap = timedelta(hours = 1)
windows = int(round(time_delta/time_gap,0))
time_format = '%Y-%h-%d %a %I %p'
from_zone = tz.tzutc()
#to_zone = tz.tzlocal()
to_zone = 'US/Pacific'
def hashtag_trend(hashtag):
    hashtag = hashtag.lower()
    arr = []
    start_time = min(df['created_datetime'])
    for window in range(windows):
        start_time = start_time
        end_time = start_time + time_lag
        subset = df['hashtags'][((df['created_datetime'] < end_time) & (df['created_datetime'] > start_time))]
        h = subset.str.contains(hashtag).mean()
        arr.append(h)
        start_time += time_gap
    arr = np.array(arr)  
    
    grad = np.gradient(arr)
    tr = np.argwhere(grad>.01).reshape(1,-1)[0]
    spikes = []
    [spikes.append(i) for i in tr if (i-1) not in tr]
    plt.figure(figsize=(14,7))
    plt.plot(arr, label='Hashtag Frequency')
    plt.plot(grad, label='Frequency Slope')
    
    for i,j in enumerate(spikes):
        spiketime = (min(df['created_datetime']) + time_gap * j) 
        spiketime = spiketime.replace(tzinfo=from_zone)
        spiketime_et = spiketime.astimezone(to_zone)
        spiketime_et = spiketime_et.strftime(time_format)
        
        plt.axvline(j, color = 'r',linestyle='dashed', label = 'Trending: {}'.format(spiketime_et))
    plt.title(hashtag, fontsize=20)    
    plt.legend(fontsize=12)   
    return
    

In [None]:
hashtag_trend('nbafinals|nbafinal')

In [None]:
hashtag_trend('nationaldonutday')

In [None]:
hashtag_trend('paris|climate')

In [None]:
hashtag_trend('covfefe')

In [None]:
hashtag_trend('Warriors')

In [None]:
from IPython.display import display

#basemaps are of type Web Map
basemaps = gis.content.search("tags:esri_basemap AND owner:esri", item_type = "web map")
for basemap in basemaps:
    display(basemap)

In [None]:
!pip install --upgrade pip
!pip install arcgis
import arcgis
from arcgis.gis import GIS
# Create a GIS object, as an anonymous user for this example
gis = GIS()
# Create a map widget
map1 = gis.map('Paris') # Passing a place name to the constructor
                        # will initialize the extent of the map.
map1

In [None]:
df[['latitude','longitude']]

In [None]:
!pip install gmaps
import gmaps
import gmaps.datasets
gmaps.configure(api_key="AIzaSyC_bskPEm1c45ahbgrEiPuCJmoLE1g_-pM") # Your Google API key


In [None]:
n = np.array([df['latitude'],df['longitude']])
n.shape

In [None]:
gmaps.locations

In [None]:
geo_df = gmaps.datasets.load_dataset(np.array(rows))
tweet_geo_df = geo_df[['latitude','longitude']]

In [None]:
locations = gmaps.datasets.load_dataset(n)

fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations))
fig

In [None]:
import gmaps
import gmaps.datasets
gmaps.configure(api_key="AIzaSyC_bskPEm1c45ahbgrEiPuCJmoLE1g_-pM") # Your Google API key

locations = gmaps.datasets.load_dataset("starbucks_uk")

fig = gmaps.Map()
starbucks_layer = gmaps.symbol_layer(
    locations, fill_color="green", stroke_color="green", scale=2)
fig.add_layer(starbucks_layer)
fig

In [21]:
import psycopg2 as pg2
import psycopg2.extras as pgex
this_host='34.211.59.66'
this_user='postgres'
this_password='postgres'
conn = pg2.connect(host = this_host, 
                        user = this_user,
                        password = this_password)

sql_select = '''select created_at, location, cleaned_tweet, hashtags, latitude, longitude, bounding_box_coord,\
                    hashtags from tweets where hashtags is not null and  latitude != 0;'''

cur = conn.cursor(cursor_factory=pgex.RealDictCursor)
cur.execute(sql_select)
rows = cur.fetchall()
conn.close()
df = pd.DataFrame(rows)
df.reset_index(inplace = True)

In [5]:
from bokeh.io import output_file, show, output_notebook
from bokeh.models import (GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)
from bokeh.layouts import row
from bokeh.plotting import figure

output_notebook()
map_options = GMapOptions(lat=30.29, lng=-97.73, map_type="roadmap", zoom=11)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
show(plot, notebook_handle = True)

In [8]:
#!pip install bokeh
from bokeh.io import output_file, show, output_notebook
from bokeh.models import (GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool)
from bokeh.layouts import row
from bokeh.plotting import figure

output_notebook()
map_options = GMapOptions(lat=30.29, lng=-97.73, map_type="roadmap", zoom=11)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
plot.title.text = "Austin"

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
#plot.api_key = "AIzaSyC_bskPEm1c45ahbgrEiPuCJmoLE1g_-pM"
plot.api_key = "AIzaSyB5ORq14HJ2CkL0-gPnQ06wwJySCH2lFi4"
source = ColumnDataSource(
    data=dict(
        lat=[30.29, 30.20, 30.29],
        lon=[-97.70, -97.74, -97.78],
    )
)

circle = Circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, line_color=None)
plot.add_glyph(source, circle)

plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
#output_file("gmap_plot.html")
show(plot, notebook_handle = True)

In [2]:
from bokeh.io import output_file, show, output_notebook
from bokeh.models import GeoJSONDataSource
from bokeh.plotting import figure
from bokeh.sampledata.sample_geojson import geojson
output_notebook()
geo_source = GeoJSONDataSource(geojson=geojson)

p = figure()
p.circle(x='x', y='y', alpha=0.9, source=geo_source)
#output_file("geojson.html")
show(p, notebook_handle = True)

In [None]:
from bokeh.io import show, output_notebook, output_file
from bokeh.models import (
    GeoJSONDataSource,
    HoverTool,
    LinearColorMapper
)
from bokeh.plotting import figure
from bokeh.palettes import Viridis6

with open(r'argentina.geojson', 'r') as f:
    geo_source = GeoJSONDataSource. (geojson=f.read())


color_mapper = LinearColorMapper(palette=Viridis6)

TOOLS = "pan,wheel_zoom,box_zoom,reset,hover,save"

p = figure(title="Argentina", tools=TOOLS, x_axis_location=None, y_axis_location=None, width=500, height=300)
p.grid.grid_line_color = None

p.patches('xs', 'ys', fill_alpha=0.7, fill_color={'field': 'objectid', 'transform': color_mapper}, 
          line_color='white', line_width=0.5, source=geo_source)


hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [("Provincia:", "@provincia")]

#output_file("PBIar.html", title="Testing islands in bokeh")

show(p)

In [6]:
from collections import OrderedDict

from bokeh.sampledata import us_counties, unemployment
from bokeh.plotting import figure, show, output_file, ColumnDataSource
from bokeh.models import HoverTool

state = "tx"

county_xs=[
    us_counties.data[code]['lons'] for code in us_counties.data
    if us_counties.data[code]['state'] == state
]
county_ys=[
    us_counties.data[code]['lats'] for code in us_counties.data
    if us_counties.data[code]['state'] == state
]

colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]

county_colors = []
county_names = []
county_rates = []
for county_id in us_counties.data:
    if us_counties.data[county_id]['state'] != state:
        continue
    rate = unemployment.data[county_id]
    idx = min(int(rate/2), 5)
    county_colors.append(colors[idx])
    county_names.append(us_counties.data[county_id]['name'])
    county_rates.append(rate)

source = ColumnDataSource(
    data = dict(
        x=county_xs,
        y=county_ys,
        color=county_colors,
        name=county_names,
        rate=county_rates,
    )
)


output_file("texas.html", title="texas.py example")

TOOLS="pan,wheel_zoom,box_zoom,reset,hover,save"

p = figure(title="Texas Unemployment 2009", tools=TOOLS)

p.patches('x', 'y',
    fill_color='color', fill_alpha=0.7,
    line_color="white", line_width=0.5,
    source=source)

hover = p.select(dict(type=HoverTool))
hover.point_policy = "follow_mouse"
hover.tooltips = OrderedDict([
    ("Name", "@name"),
    ("Unemployment rate)", "@rate%"),
    ("(Long, Lat)", "($x, $y)"),
])

show(p)

RuntimeError: bokeh sample data directory does not exist, please execute bokeh.sampledata.download()

In [11]:
from bokeh.io import curdoc, show, output_notebook, output_file
from bokeh.models.glyphs import Circle
from bokeh.layouts import column
from bokeh.models import GMapPlot, Range1d, ColumnDataSource, PanTool, WheelZoomTool, BoxSelectTool, GMapOptions, Button, ResetTool
output_notebook()
x_range = Range1d()
y_range = Range1d()

# JSON style string taken from: https://snazzymaps.com/style/1/pale-dawn
map_options = GMapOptions(lat=30.2861, lng=-97.7394, map_type="roadmap", zoom=13, styles="""
[{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]},{"featureType":"landscape","elementType":"all","stylers":[{"color":"#f2e5d4"}]},{"featureType":"poi.park","elementType":"geometry","stylers":[{"color":"#c5dac6"}]},{"featureType":"poi.park","elementType":"labels","stylers":[{"visibility":"on"},{"lightness":20}]},{"featureType":"road","elementType":"all","stylers":[{"lightness":20}]},{"featureType":"road.highway","elementType":"geometry","stylers":[{"color":"#c5c6c6"}]},{"featureType":"road.arterial","elementType":"geometry","stylers":[{"color":"#e4d7c6"}]},{"featureType":"road.local","elementType":"geometry","stylers":[{"color":"#fbfaf7"}]},{"featureType":"water","elementType":"all","stylers":[{"visibility":"on"},{"color":"#acbcc9"}]}]
""")

API_KEY = "AIzaSyAM1OHVm6Yr_i54Kt01mylfxyNxQdxmxHQ"

plot = GMapPlot(
    x_range=x_range, y_range=y_range,
    map_options=map_options,
    api_key=API_KEY,
)
plot.title.text = "Austin"

source = ColumnDataSource(
    data=dict(
        lat=[30.2861, 30.2855, 30.2869],
        lon=[-97.7394, -97.7390, -97.7405],
        fill=['orange', 'blue', 'green']
    )
)

circle = Circle(x="lon", y="lat", size=15, fill_color="fill", line_color="black")
plot.add_glyph(source, circle)

pan = PanTool()
wheel_zoom = WheelZoomTool()
box_select = BoxSelectTool()
reset_tool = ResetTool()

plot.add_tools(pan, wheel_zoom, box_select, reset_tool)
show(p, notebook_handle = True)
def callback():
    #map_options = GMapOptions(lat=30.2821, lng=-97.7414, map_type="roadmap", zoom=13)
    plot.map_options.styles="""[ { "elementType": "labels", "stylers": [ { "visibility": "off" }, { "color": "#f49f53" } ] }, { "featureType": "landscape", "stylers": [ { "color": "#f9ddc5" }, { "lightness": -7 } ] }, { "featureType": "road", "stylers": [ { "color": "#813033" }, { "lightness": 43 } ] }, { "featureType": "poi.business", "stylers": [ { "color": "#645c20" }, { "lightness": 38 } ] }, { "featureType": "water", "stylers": [ { "color": "#1994bf" }, { "saturation": -69 }, { "gamma": 0.99 }, { "lightness": 43 } ] }, { "featureType": "road.local", "elementType": "geometry.fill", "stylers": [ { "color": "#f19f53" }, { "weight": 1.3 }, { "visibility": "on" }, { "lightness": 16 } ] }, { "featureType": "poi.business" }, { "featureType": "poi.park", "stylers": [ { "color": "#645c20" }, { "lightness": 39 } ] }, { "featureType": "poi.school", "stylers": [ { "color": "#a95521" }, { "lightness": 35 } ] }, {}, { "featureType": "poi.medical", "elementType": "geometry.fill", "stylers": [ { "color": "#813033" }, { "lightness": 38 }, { "visibility": "off" } ] }, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, { "elementType": "labels" }, { "featureType": "poi.sports_complex", "stylers": [ { "color": "#9e5916" }, { "lightness": 32 } ] }, {}, { "featureType": "poi.government", "stylers": [ { "color": "#9e5916" }, { "lightness": 46 } ] }, { "featureType": "transit.station", "stylers": [ { "visibility": "off" } ] }, { "featureType": "transit.line", "stylers": [ { "color": "#813033" }, { "lightness": 22 } ] }, { "featureType": "transit", "stylers": [ { "lightness": 38 } ] }, { "featureType": "road.local", "elementType": "geometry.stroke", "stylers": [ { "color": "#f19f53" }, { "lightness": -10 } ] }, {}, {}, {} ] """
    plot.map_options.lat += 0.01
    plot.map_options.lng += 0.01
    #plot.map_options = map_options

button = Button(label="Foo")
button.on_click(callback)

curdoc().add_root(column(button, plot))

In [30]:
#!pip install plotly
import plotly.plotly as py
from plotly.graph_objs import *

import pandas as pd

mapbox_access_token = 'pk.eyJ1IjoiY2hlbHNlYXBsb3RseSIsImEiOiJjaXFqeXVzdDkwMHFrZnRtOGtlMGtwcGs4In0.SLidkdBMEap9POJGIe1eGw'




#df[['latitude','longitude']]            
data = Data([
    Scattermapbox(
        lat=lat,
        lon=lon,
        mode='markers',
        marker=Marker(
            size=18,
            color='rgb(155, 240, 225)',
            opacity=0.7
        ),
       
        hoverinfo='text'
    ),
   ]
)
        
layout = Layout(
    title='Nuclear Waste Sites on Campus',
    autosize=True,
    hovermode='closest',
    showlegend=False,
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=38,
            lon=-94
        ),
        pitch=0,
        zoom=3,
        style='dark'
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='jupyter/Nuclear Waste Sites on American Campuses')


Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points




Aw, snap! We don't have an account for ''. Want to try again? You can authenticate with your email address or username. Sign in is not case sensitive.

Don't have an account? plot.ly

Questions? support@plot.ly


PlotlyError: Because you didn't supply a 'file_id' in the call, we're assuming you're trying to snag a figure from a url. You supplied the url, '', we expected it to start with 'https://plot.ly'.
Run help on this function for more information.

In [26]:
lat = np.array(df['latitude'])
lon = np.array(df['longitude'])

In [25]:
la[]

34.045360000000002

In [None]:
json_data = pd.read_json('https://raw.githubusercontent.com/ottlngr/2016-15/ottlngr/ottlngr/sites.json')
rating_one_site_lat = []
rating_one_site_lon = []
locations_name = []

for index in range(len(json_data)):
    if json_data['locations'][index] != []:
        if json_data['site'][index]['rating'] == 1:
            rating_one_site_lat.append(json_data['locations'][index][0]['latitude'])
            rating_one_site_lon.append(json_data['locations'][index][0]['longitude'])
            locations_name.append(json_data['site'][index]['name'])