In [151]:
from pymongo import MongoClient
import pandas as pd
import numpy as np
import pprint
import re
import requests as req

client = MongoClient('mongodb://localhost:27017/')
db = client.companies


#### First of all, let's query the "young" (valid) companies from our new database:

In [152]:
young=db.offices_ok.find({"is_young":1},{"_id":0,"index":0})
mydf=pd.DataFrame(young)

In [156]:
mydf=mydf[['name','category_code','description','tag_list','founded_year','number_of_employees','raised_money_dollars','lat','lng','geo_point','is_young','is_design','is_similar','is_succesful']]
mydf.head()

Unnamed: 0,name,category_code,description,tag_list,founded_year,number_of_employees,raised_money_dollars,lat,lng,geo_point,is_young,is_design,is_similar,is_succesful
0,Geni,web,Geneology social network site,"geni, geneology, social, family, genealogy",2006,18,16500000,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,...",1,0,1,1
1,MeetMoi,social,Mobile Dating,"mobile, dating, location, realtime, phone",2007,15,5580000,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ...",1,0,1,1
2,Twitter,social,Real time communication platform,"text, messaging, social, community, twitter, t...",2006,1300,1160000000,37.776805,-122.416924,"{'type': 'Point', 'coordinates': [-122.4169244...",1,0,1,1
3,Facebook,social,Social network,"facebook, college, students, profiles, network...",2004,5299,2430000000,37.41605,-122.151801,"{'type': 'Point', 'coordinates': [-122.151801,...",1,0,1,1
4,Digg,news,user driven social content website,"community, social, news, bookmark, digg, techn...",2004,60,45000000,37.764726,-122.394523,"{'type': 'Point', 'coordinates': [-122.394523,...",1,1,1,1


#### Let's have a quick look at the location of our "desirable" companies:

In [157]:
import folium

In [159]:
m = folium.Map(zoom_start=15)

for i in range(len(mydf["lat"])):
    tooltip = mydf["name"][i]
    folium.Marker([mydf["lat"][i], mydf["lng"][i]], popup=mydf["name"][i], tooltip=tooltip).add_to(m)
m

#### Now we want to make a geo querye for each of our desired companies. We would like to know for each company:
#### 1. How many nearby design, similar and/or succesufl companies there are.
#### 2. How many nearby "old" companies there are.

In [160]:
def find_old(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_young':0
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista
       


In [161]:
def find_young(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_young':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [162]:
def find_design(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_design':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [163]:
def find_similar(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_similar':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [164]:
def find_succesful(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_succesful':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [197]:
prueba_df=mydf.copy()
prueba_df["old_near"]=find_old(prueba_df["geo_point"])
prueba_df["young_near"]=find_young(prueba_df["geo_point"])
prueba_df["design_near"]=find_design(prueba_df["geo_point"])
prueba_df["similar_near"]=find_similar(prueba_df["geo_point"])
prueba_df["succesful_near"]=find_succesful(prueba_df["geo_point"])

In [198]:
test=prueba_df[prueba_df["old_near"]==0]

In [199]:
test["Score"]=test["young_near"]+test["design_near"]+test["similar_near"]+test["succesful_near"]
test.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


(1830, 20)

In [200]:
test=test.sort_values(by='Score', ascending=False)
test=test.reset_index()
test.head()

Unnamed: 0,index,name,category_code,description,tag_list,founded_year,number_of_employees,raised_money_dollars,lat,lng,...,is_young,is_design,is_similar,is_succesful,old_near,young_near,design_near,similar_near,succesful_near,Score
0,2192,Silver Tail Systems,analytics,Business Logic Abuse Fraud Protection,"business-logic-abuse, security, fraud, online-...",2008,90,22100000,37.428088,-122.143368,...,1,0,1,1,0,16,0,15,8,39
1,129,SayNow,mobile,Reinventing the phone call,,2005,20,7500000,37.42746,-122.143915,...,1,0,1,1,0,16,0,15,8,39
2,107,Doostang,web,Exclusive Career Community,"job-search, career, jobs, social-networking, e...",2005,10,5750000,37.427235,-122.145783,...,1,0,1,1,0,16,0,15,8,39
3,844,SocialVibe,advertising,Engagement Advertising,"social-media, social-network, media-platform, ...",2007,80,43900000,34.081524,-118.382674,...,1,0,1,1,0,16,2,14,2,34
4,634,Boxbe,web,Email Plugin,"email, anti-spam, social, screening, yahoo-mai...",2005,3,1500000,37.800209,-122.442592,...,1,0,1,1,0,15,1,12,3,31


In [209]:
m = folium.Map(zoom_start=50)

for i in range(len(test["lat"][:100])):
    tooltip = test["name"][i]
    loc=[test["lat"][i], test["lng"][i]]
    score=int(test["Score"][i])
    folium.Marker(loc, popup=tooltip, tooltip=tooltip).add_to(m)
    folium.Circle(location=loc, radius=(score)*10000, color='blue', fill=True, fill_color='blue').add_to(m)
m
#test["young_near"][i]+test["design_near"][i]+test["similar_near"][i]+test["succesful_near"][i]

4656