In [3]:
import pandas as pd
from src import utils
import requests
from src.apikeys import TWITTER, MONGO

from pymongo import MongoClient
client = MongoClient('mongodb://{}:{}@localhost:27017'.format(MONGO.USER,MONGO.PASSWORD))
# Access/Initiate Database
db = client['twitter_database']
# Access/Initiate Table
tab = db['broncos_raiders']

In [4]:
states = pd.read_csv('../data/states.csv')

states.head()

Unnamed: 0,State,Abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [7]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap as Basemap
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
# Lambert Conformal map of lower 48 states.
m = Basemap(llcrnrlon=-119,llcrnrlat=22,urcrnrlon=-64,urcrnrlat=49,
        projection='lcc',lat_1=33,lat_2=45,lon_0=-95)
# draw state boundaries.
# data from U.S Census Bureau
# http://www.census.gov/geo/www/cob/st2000.html
shp_info = m.readshapefile('st99_d00','states',drawbounds=True)
# population density by state from
# http://en.wikipedia.org/wiki/List_of_U.S._states_by_population_density
popdensity = {
'New Jersey':  438.00,
'Rhode Island':   387.35,
'Massachusetts':   312.68,
'Connecticut':    271.40,
'Maryland':   209.23,
'New York':    155.18,
'Delaware':    154.87,
'Florida':     114.43,
'Ohio':  107.05,
'Pennsylvania':  105.80,
'Illinois':    86.27,
'California':  83.85,
'Hawaii':  72.83,
'Virginia':    69.03,
'Michigan':    67.55,
'Indiana':    65.46,
'North Carolina':  63.80,
'Georgia':     54.59,
'Tennessee':   53.29,
'New Hampshire':   53.20,
'South Carolina':  51.45,
'Louisiana':   39.61,
'Kentucky':   39.28,
'Wisconsin':  38.13,
'Washington':  34.20,
'Alabama':     33.84,
'Missouri':    31.36,
'Texas':   30.75,
'West Virginia':   29.00,
'Vermont':     25.41,
'Minnesota':  23.86,
'Mississippi':   23.42,
'Iowa':  20.22,
'Arkansas':    19.82,
'Oklahoma':    19.40,
'Arizona':     17.43,
'Colorado':    16.01,
'Maine':  15.95,
'Oregon':  13.76,
'Kansas':  12.69,
'Utah':  10.50,
'Nebraska':    8.60,
'Nevada':  7.03,
'Idaho':   6.04,
'New Mexico':  5.79,
'South Dakota':  3.84,
'North Dakota':  3.59,
'Montana':     2.39,
'Wyoming':      1.96,
'Alaska':     0.42}
# choose a color for each state based on population density.
colors={}
statenames=[]
cmap = plt.cm.hot # use 'hot' colormap
vmin = 0; vmax = 450 # set range.
for shapedict in m.states_info:
    statename = shapedict['NAME']
    # skip DC and Puerto Rico.
    if statename not in ['District of Columbia','Puerto Rico']:
        pop = popdensity[statename]
        # calling colormap with value between 0 and 1 returns
        # rgba value.  Invert color range (hot colors are high
        # population), take sqrt root to spread out colors more.
        colors[statename] = cmap(1.-np.sqrt((pop-vmin)/(vmax-vmin)))[:3]
    statenames.append(statename)
# cycle through state names, color each one.
ax = plt.gca() # get current axes instance
for nshape,seg in enumerate(m.states):
    # skip DC and Puerto Rico.
    if statenames[nshape] not in ['Puerto Rico', 'District of Columbia']:
    # Offset Alaska and Hawaii to the lower-left corner. 
        if statenames[nshape] == 'Alaska':
        # Alaska is too big. Scale it down to 35% first, then transate it. 
            seg = list(map(lambda x,y: (0.35*x + 1100000, 0.35*y-1300000), seg))
        if statenames[nshape] == 'Hawaii':
            seg = list(map(lambda x,y: (x + 5100000, y-900000), seg))

        color = rgb2hex(colors[statenames[nshape]]) 
        poly = Polygon(seg,facecolor=color,edgecolor=color)
        ax.add_patch(poly)
plt.title('Filling State Polygons by Population Density')
plt.show()

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

In [9]:
total_locations = tab.count({'user.location':{'$ne':None}})
tweet_locations = tab.find({'user.location':{'$ne':None}},{'user.location':1})

In [10]:
states = set(locations.state)
cities = set(locations.city)
city_states = list(locations.apply(lambda row: [row['city'],row['state']],axis=1))

def contains_state(location):
    for state in states:
        if state in location:
            return True
    return False

def contains_city(location):
    for city in cities:
        if city in location:
            return True
    return False

def contains_city_state(location):
    for city,state in city_states:
        if city in location and state in location:
            return True
    return False


In [11]:
print(sum([contains_state(entry['user']['location']) for entry in 
                    tab.find({'user.location':{'$ne':None}},{'user.location':1})]))
print(sum([contains_city(entry['user']['location']) for entry in 
                    tab.find({'user.location':{'$ne':None}},{'user.location':1})]))
print(sum([contains_city_state(entry['user']['location']) for entry in 
                    tab.find({'user.location':{'$ne':None}},{'user.location':1})]))

20025
39417
5287


In [12]:
valid_city_tweets = [entry for entry in 
                    tab.find({'user.location':{'$ne':None}},{'user.location':1})
                    if contains_city(entry['user']['location']) ]
for x in valid_city_tweets:
    print(x['user']['location'])

Raleigh, NC
Dallas, TX
Arvada, CO
Broomfield, CO
Miami, FL
San Diego, CA
San Francisco, CA
Denver, CO
Northern Kingsport TN
Hamilton, NY
San Marcos, Texas
Tuscaloosa, AL
Denver, CO
Burlington, NC
Vancouver, Canada
Baltimore, MD
Denver, CO
Columbus, GA
Colorado Springs
Oakland raised. Vallejo based.
Fort Collins, Colorado
El Paso, TX
Charleston, SC
Greenville Nc
Sacramento
Houston, TX
San Antonio, TX
Yuma (AZ)
San Jose/ San Francisco
Littleton, CO
Ontario
Las Vegas, NV
Mankato, MN
Bellingham, WA
East Oakland, CA
Yuma (AZ)
Santa Monica, CA
Atlanta, GA
Albuquerque, NM
Las Vegas
Wilmington,De
New York
Memphis, TN
Los Angeles, CA 
Killeen, TX
Colorado Springs/Pueblo, CO
Denver, CO
Austin,TX ➡️ Revelstoke,BC
Rochester, NY
Seattle, WA
Long Beach, CA
Pittsburgh-ish, PA, USA
Oakland, CA
Minneapolis
Vallejo, CA
Duluth, MN
Vancouver, BC 🌎
St. Joseph, MO|KCMO|Touring|
 DIstrict of Columbia
West Bend, WI / Phoenix, AZ
Wausau, Wi
Denver, CO
Sunny San Diego 
Ontario
San Francisco, CA
New Haven/West H

Philadelphia, PA
Vancouver
Richmond, Ky
Los Angeles, CA
Dallas, Texas
Charleston, SC & DC
Chattanooga, TN
North Las Vegas, NV
Salinas, CA
Columbia, MO
Thousand Oaks, CA
Merced, CA
Miami, FL
Burbank, CA
North Las Vegas
Hayward, CA
Fresno, CA
Austin, TX
Columbus, OH
Atlanta
San Diego, CA
Denver, CO
Cambridge, MA
New York City
San Francisco, Bay Area
Atlanta, GA
Charlotte, NC
Salinas, CA
Los Angeles, California.
Pittsburgh, PA
Santa Ana, CA
Los Angeles, CA
Tampa, FL
OKC-LA-Chicago-NYC 
Fort Lauderdale, FL
Washington DC
Green Bay
Tampa Bay, Florida
Hagerstown, MD
Bend Oregon
Charlotte, NC
Parker, CO
Columbus, OH
Kent, OH
Northern Kingsport TN
Denver, CO
Los Angeles, CA
Dayton, OH
Atlanta, GA
Hollywood, CA
San Diego, CA
Corpus Christi, TX
Oakland, CA bay area
Scottsdale, AZ
Philadelphia, PA
Denver, CO
Los Angeles
Columbus ✈️ New York City
Denver, CO
Bullhead City, AZ
Fresno, CA
|ohio||UnitedStates of Lowell|
Billings, MT
Chino Hills, California
Houston, Texas
Cincinnati, Nashville, Seattle


San Francisco, CA
Tucson, AZ
Salt Lake City
Fort Worth, TX
Denver Tech Center
The Rocky Mountains
Denver, CO
Columbus Ohio #OhioMuscle
Fresno, CA
Phoenix, AZ
Newark, DE
Midwest via Memphis
El Paso, TX
Pacific Beach, San Diego
Denver, CO
Chicago, IL
New York, USA
Denver, Colorado
Norwalk, CA
Denver, CO
Springfield, MA
Charlotte, NC
B Town, Washington
Atlanta, GA
Houston, TX
Trenton,Nj ✈ Puerto Rico
Denver, CO
Boston
Phoenix, AZ
San Bernardino Strong
Philadelphia, PA
Berkeley 
Charlotte, NC
Buena Park, CA
Manhattan, NY
Whittier, CA
Fayetteville, NC
Tempe, AZ
Alameda, CA
Colorado Springs
Bristol | London UK
Livermore, CA
San Francisco, CA
Manteca, Ca
Glendale, AZ
Los Angeles, CA
Tacoma, WA
Brooklyn New York
St. Joseph, MO|KCMO|Touring|
Albuquerque, NM
Seattle, WA & Tucson, AZ.
Chicago, IL
San Antonio, TX
Somehow I'm in Toledo
Los Angeles, California.
Lancaster, PA
Fresno, Ca.
Pasadena  
New York, USA
Fort Carson, CO
Atlanta, GA
San Jose, CA
North Oakland, CA
Phoenix, AZ
#RootedInOakland 

West Hills, Los Angeles
Houston, TX
Denver, Colorado 
Knoxville, TN
Florence, KY
Denver, CO
West Hills, Los Angeles
Covina, CA
Murrieta, CA
Western Washington USA
Fort Lauderdale, FL
Summerlin, Las Vegas, NV
Atlanta, GA
Houston, TX
Murrieta, CA
Denver
Summerlin, Las Vegas, NV
Fort Collins, CO
Hamilton Scotland 
San Francisco ✈️ New Orleans 
Denver, CO
San Antonio, Texas
San Francisco 
New Orleans, LA
Berkeley Springs WV
Los Angeles, CA
Fresno, CA
Fargo, ND
Atlanta, GA
Miami, FL
Summerlin, Las Vegas, NV
Overland Park, KS
Denver, CO
Washington, DC
Denver CO
Chicago
CA Central Coast- Santa Maria
Summerlin, Las Vegas, NV
San Antonio, TX
Denver
Denver, CO
Los Angeles
Glendale AZ
Denver, CO
Treasure Island, San Francisco
Seattle, WA
Phoenix from Chicago
Los Angeles, CA
Tepic, Nayarit-Fontana, CA
District of Columbia, USA
New York, USA
Bakersfield, CA
Summerlin, Las Vegas, NV
Colorado Springs
Denver, Colorado 
Newport News VA
Denver, Colorado
Stockton, CA
Los Angeles, CA
Charlotte, NC
Boston


Las Vegas, NV
Washington, USA
Denver, CO
Colorado Springs, CO
DC/Charlottesville, Va
Orangevale, CA
Washington, DC
Reno, NV
Toronto, Ontario
Norfolk, VA
Clarksville, TN
Long Beach, CA
Los Angeles
Dallas, TX
West Hartford, CT
Cleveland, OH
Houston, TX
Denver
Denver, Colorado
Visalia, CA
Las Vegas, NV
Chicago ✈️ ATL ✈️ AZ
Vancouver, WA
Huntington Beach, California
San Francisco, CA
Denver, CO
Seattle
Hayward, CA
Omaha, NE
Pensacola, Fl
Las Vegas
New York, USA
Dallas, TX
Denver, CO
Vallejo, CA
Denver
Denver, CO
Covina, CA
Visalia, CA
Denver, CO
West Seattle, WA
Oakland, CA
Milwaukee
Iowa City, IA
Denver, Colorado 
Las Vegas, NV
New York, NY
Aventura, FL
Baltimore, MD
Los Angeles, CA
New York, NY
Denver/Jean-Claude's Damn Van
Phoenix, AZ
Washington State, USA
El Cajon, CA
Denver, CO
Reno, NV
Las Vegas, NV
Hayward, CA
Reno, NV
Stockton, CA 
Denver, CO
Washington, USA
Salinas, CA
Bakersfield, CA
New Orleans, LA
Pasadena, CA
District of Columbia, USA
Fairfield, CA 
Modesto
Covina, CA
Pueblo, 

Los Angeles, CA
Boulder, CO
New York, USA
North Oakland, CA
Los Angeles, CA
Boston
Atlanta, GA
Atlanta, Georgia, USA, Earth
Madera, CA
Seattle,  WA
Mile High, Denver CO
Denver slash Strasburg
Tulare, CA
Los Angeles,California
Denver, CO
Bakersfield, CA
Denver, CO
Cambridge, MA
Denver, Colorado
Denver, CO
Los Angeles, CA
St. Charles, MO. 
Oakland, CA
Denver, CO
Lincoln, NE
Madera, CA
Frayser, Bay Area Memphis
Bakersfield, CA
Kansas City, MO
Fort Collins, CO
Miami 🌴
Sacramento, CA
Los Angeles, CA
Oakland!
Spokane, WA
Washington, USA
North Atlanta,GA
San Rafael, CA
Tucson
Galveston, TX
South Richmond, Ca ✈️ ATL
Atlanta Ga
Los Angeles
Denver, CO
Columbus, GA
Chicago
Dallas, TX
Philadelphia, PA
Boston, MA
Concord, NC
Garden Grove, CA
Washington, USA
Denver
Denver, CO
Denver, CO
Oakland, CA
Kansas City, MO
Denver, CO
Buffalo, NY
Atlanta, GA
Los Angeles, CA
San Jose, CA
Albuquerque, NM
Oakland, CA
Pocatello, Idaho
Phoenix, AZ
Clovis CA
Reno, NV
Columbus
Louisville/Boston
Tacoma, WA
Tyler Hoec

Salt Lake City, UT
Eugene, Oregon
Fresno, CA ➡️ Monterey County 
Los Angeles, Ca.
Ontario, CA
Denver
Oakland, CA
Chicago
Kansas City
Temple Hills MD
Detroit, MI
Denver Colorado
Denver, CO
Denver, CO
Denver, CO
Charlotte
West Covina, CA
Memphis, TN
AWC | Yuma, AZ
Palm Springs, CA
San Diego, CA
San Antonio, TX
West Islip, NY Hempstead, NY
Farmington, NM
Kansas City
Buffalo
Oakland, CA 
Kansas City, MO
Flagstaff, AZ
San Diego, CA
Evansville, IN
San Antonio, TX
Fort Worth, Texas
Chicago ➡️ Dallas 
Fullerton, CA
Detroit ✈️ Atlanta ✈️ Houston
Eugene, OR
New Orleans, LA
Denver, CO
Trenton,New Jersey
Sacramento 
Los Angeles, CA
Denver, Colo.
Oakland, CA
Pittsburgh
La Canada Flintridge, CA
Denver
SLC, UT ✈ Denver, CO
Albuquerque, NM
Birmingham, England
Lancaster, NY
Providence, RI via 757 & 561
Fresno, CA
Salinas, CA
Indianapolis, IN
Los Angeles, CA
Tuscaloosa, AL
Buffalo
Chicago, IL
Santa Fe Springs, CA
Whittier, CA
Columbus, GA
San Ramon, CA
Kansas City, MO
Pittsburgh, PA
Overland Park, KS
La

Houston, TX
Richmond, VA
Las Vegas
Newark, NJ
Oakland, CA
Durham, NC
Richmond VA
San Diego, CA
Wilson, NC
Newark, CA
Kansas City, Missouri
Kansas City, MO
Westminster Colorado
San Jose, CA
Kansas City, MO
Durham, NC
Tulsa, OK
Santa Maria, CA
Hendersonville, TN
Los Angeles, CA
Northern Kingsport TN
Porterville, CA
Lake Charles, La
Edinburg, TX
Kansas City, MO
Sacramento, CA
San Antonio, TX
Chicago, IL
New York, USA
Toledo Made ✈️Florida Living🏝
Baltimore, MD
Denver, CO
Virginia Beach 757
Denver, CO
Los Angeles, CA
Cincinnati 
Seattle
Columbus, GA
Pueblo, CO
Los Angeles, CA
Denver, Colorado
Bell Gardens/Moreno Valley, CA
Colorado Springs, CO
Buffalo, NY
Denver, CO
Houston, TX
New York, USA
Los Angeles
Hammond IN
Shakopee, MN
Denver, CO
Albuquerque, NM
Denver, Colorado
Tucson, AZ
Body in Chicago ❤️ in Denver
Denver, CO
Detroit, MI
Ontario, Canada
Houston, TX
Northern Kingsport TN
La Crosse, WI
Bristol, CT
Memphis
Los Angeles, CA
San Francisco Bay Area
San Francisco, CA
Oxnard, CA
Recife -

Blue Springs, MO
Reno, NV
Richmond, VA
Fresno, CA
Rocklin, CA
Phoenix, AZ
Los Angeles, Ca
Richmond, CA
Jupiter
Memphis, TN
Sacramento, CA
Oakland, CA
Irving, TX
Evansville, IN
San Francisco, CA
Newport Beach, CA
Georgetown, SC
Dallas, TX
Davenport, IA
Santa Monica, California
Denver CO
Chicago, IL | KZOO
Buffalo NY 
Cincinnati
Providence, RI
Newark Nj
Denver, CO
Clarksville, IN
Pocatello, Idaho
Salinas, CA
San Leandro, CA
Denver, CO
Sacramento, CA
Denver
Oakland, CA
Denver, CO
Royal Oak, MI
Whittier, CA
Lincoln, MO
Ann Arbor, MI
Los Angeles, Ca
New Orleans to DAŁLA$
Warner Robins, GA
Toronto, Ontario
St. Louis
Oakland, CA
San Diego, CA
Davis, CA
New York City
Pittsburgh, PA
Berkeley, CA
New York, USA
Brampton, Ontario,Canada
Alexandria, VA
Las Vegas, NV
Sacramento, CA
San Francisco, CA
Orlando, FL
San Jose, CA
Miami, Florida
Fresno, CA
Billings, MT
Baltimore, MD
South Denver
Denver, CO
Colorado Springs, Co
Las Vegas, USA
New York, USA
New Orleans 
Chicago Ridge, IL
Washington, D.C.
Den

Trenton, OH
Honolulu, HI
San Antonio, TX
Vancouver, BC
Charlotte, NC
Sacramento, CA
Chicago, IL
Denver Colorado 
El Paso, TX
Kentucky
San Marcos, TX
Sun Valley, Los Angeles
Rancho Cucamonga, CA
Orlando, FL
Scranton, PA
Windsor, Ontario
Garland, TX
Oakland, CA
Denver, Co
Boulder, CO
Houston, TX
Denver, CO
Elysian Park, Los Angeles
Denver, CO
Los Angeles, CA Oahu Hawaii
Lorain/Cleveland
Denver, CO
Austin Texas 
Tampa, FL
Dallas, TX
Albuquerque, NM
Chatsworth, Los Angeles
New Orleans, LA
Long Beach, CA
Tepic, Nayarit-Fontana, CA
Santa Maria, CA
East Oakland CA
Chicago, IL
Roseville, CA
Oracle Arena/Oakland Coliseum
San Diego, CA
Chicago, IL
Ontario, Canada
Antioch, CA
Austin, TX
Charleston, SC
Virginia Beach, VA
Colorado Springs, CO
Detroit, MI
Denver Colorado 
Denver, CO
Vancouver
UC Berkeley
Denver, CO
Denver, CO
Gardena, CA
Aurora, CO
HTX -- San Marcos, TX
The Stars...  Houston,TX  area
Columbus, Ohio
Seattle, NY, SF
Ontario, Ca
Carlsbad, California
Tucson
San Jacinto, CA 
Wyoming, USA

Detroit Lakes, MN
Oakland, CA
San Diego, CA
Portage Lakes, Ohio
Federal Way, WA
New York City
Charlotte, NC
Long Beach | Boston | ATL
Miami, Florida
Oakland, CA
Fresno, California
San Jose, CA
Tucson, AZ
Denver, CO
Reno
Los Angeles
Chicago 
Denver, CO
Kansas City, MO
Los Angeles, CA
Allentown, PA
College Station, TX
El Paso, TX
Loveland, CO
San Francisco Bay Area 
Oakland, CA
Aurora, CO
Memphis, TN
Denver, CO
New York, NY
Oakland, CA 
Santa Monica, Jalisco
Oakland CA
Long Beach, Ca
Los Angeles, California
Hampton, VA
Independence, MO
East Oakland, California 
Atlanta, GA
Dallas, TX
Jacksonville FL
Springfield, IL USA
Green Bay, WI
Oakland, CA
Dover '12 HarrisburgU '16
Centennial, CO
Denver
Farmington, NM
Chesapeake, VA
Ottawa, Ontario
San Jose, CA
Atlanta/Nashville . 
#Seattle
Los Angeles
Seattle, WA
Whittier, CA
Walnut Creek, CA
Niagara Falls, NY
Wilmington, NC
Warren, OH
Tucson, AZ
#Seattle
Canton, MI
Davenport, IA
Spokane, WA
Los Angeles, CA
Troy, MI
Austin, Texas
Buffalo,Ny
Chicago

Las Vegas, Denver
Oakland, CA
Seattle, WA
Denver, CO
Anaheim, CA
Lindenwood Park, St. Louis, MO
Charlotte, NC
Blacksburg, Virgina
Tacoma, WA
New York City
Houston, TX
Raleighwood/NCAT19
Los Angeles, CA
Atlanta, Ga
Vallejo, CA
Columbia, Tennessee
Denver Colorado
#Seattle
Seattle, WA
Denver, CO
Seattle. NYC. Denver. 
San Francisco, Hellafornia
Albuquerque, NM
Castle Rock, CO
New York CIty
Las Vegas
Oakland, CA
Kansas City, MO
Honolulu, HI
San Jose, California
Dallas, TX
Reno, NV
Denver, CO
San Leandro - Hayward, CA
Midland, NC 
Buffalo, NY
Bakersfield, CA
Denver
Fresno, CA
Fort Worth, Texas 
Indianapolis 
Boulder, CO
Denton, TX
Dallas resident...LA Born 
Bowie MD
Alameda, CA
New York
Highlands Ranch, CO
Toledo Made ✈️Florida Living🏝
Reno, NV
New York, NY
Oakland Ca
San Antonio, TX
Bellflower, CA
Denver Colorado
New York
Chicago, IL
Dallas, TX
Atlanta
San Diego
Virginia Beach, VA
Fresno CA
Oakland to San Francisco 
Jersey City
Whittier, CA
Miami, FL
Fresno, California
Lansing, MI
Fresno, 

Chicago, Illinois
Denver, CO
Baltimore to NY 
Las Vegas, NV
Queens,New York
Las Vegas --- Spearfish
Philadelphia, PA
Plymouth, MI
Parker, CO
St. Louis, MO
West Palm Beach, FL
Charlotte, NC
Sacramento, CA
Santa Rosa/Harlingen
Baton Rouge, LA
Denver
Redlands East Valley 
Denver Co
New York
Lafayette, LA
Los Angeles
Beautiful City Of Denver, CO
Colorado Springs, CO
Kansas City, MO
Philadelphia, PA
Centennial, CO
Elkhart, United States
Anchorage, AK
New York
Akron, OH
Grand Island, NY
Denver, CO
Arlington, VA
Las Cruces
East Dublin, Ga 
Vancouver, British Columbia
Chicago
The OTHER Vancouver--WA State
Chicago
New York, USA
Dallas, TX
Sparks, NV
Las Vegas, NV
Springfield,MA
Westerville, OH
Waco, TX
Chicago, IL
Centennial, CO
San Antonio, Texas
Denver, CO ➡️LA 
Austin, TX
Mesa, AZ
Las Vegas, Nevada
Montgomery, AL
San Diego, CA
San Diego
New Hampshire and Boston
Miami, FL
Oakland, CA
Fall River, MA
Lenexa, Kansas
Orange Nation
Ontario, CA
Charlotte, NC
Cleveland, OH
Napa, CA
Houston, TX
Chica

In [18]:
locations = pd.read_csv('../data/Top5000Population.csv',encoding='iso8859',names=['city','state','pop'],
               index_col=False)
locations.head()

Unnamed: 0,city,state,pop
0,New York,NY,8363710
1,Los Angeles,CA,3833995
2,Chicago,IL,2853114
3,Houston,TX,2242193
4,Phoenix,AZ,1567924


In [22]:
import re
states = set(locations.state)
cities = set(locations.city)
city_states = list(locations.apply(lambda row: [row['city'],row['state']],axis=1))

def contains_state(location):
    for state in states:
        if state in re.split(r'\W+',location):
            return True
    return False

def contains_city(location):
    for city in cities:
        if city.lower() in location.lower():
            return True
    return False

print(sum([contains_state(entry['user']['location']) for entry in 
                    tab.find({'user.location':{'$ne':None}},{'user.location':1})]))
print(sum([contains_city(entry['user']['location']) for entry in 
                    tab.find({'user.location':{'$ne':None}},{'user.location':1})]))


37034
19570
