In [None]:
#Copyright (c) Microsoft Corporation. All rights reserved.
#Licensed under the MIT License.

# -*- coding: utf-8 -*-

import json
import os 
from pprint import pprint
import requests

# Add your Bing Search V7 subscription key and endpoint to your environment variables.
subscription_key = os.environ['BING_SEARCH_V7_SUBSCRIPTION_KEY']
endpoint = os.environ['BING_SEARCH_V7_ENDPOINT'] + "/bing/v7.0/search"

# Query term(s) to search for. 
query = "Microsoft Cognitive Services"

# Construct a request
mkt = 'en-US'
params = { 'q': query, 'mkt': mkt }
headers = { 'Ocp-Apim-Subscription-Key': subscription_key }

# Call the API
try:
    response = requests.get(endpoint, headers=headers, params=params)
    response.raise_for_status()

    print("\nHeaders:\n")
    print(response.headers)

    print("\nJSON Response:\n")
    pprint(response.json())
except Exception as ex:
    raise ex

### 1.0 setup the network

In [37]:
import yfinance as yf
import requests
import pandas as pd
import numpy as np

from keys import *

pd.options.display.max_rows = None

### 2.0 get all the public company data through yfinance by using symbol

In [7]:
company_df = pd.read_csv('tickers.csv')

Unnamed: 0,symbol
0,DDD
1,MMM
2,WBAI
3,WUBA
4,EGHT
5,AHC
6,AOS
7,ATEN
8,AIR
9,AAN


In [43]:
sym_ls = [row['symbol'] for i,row in company_df.iterrows()]

In [81]:
def getFinancials(x,label):
    try:
        return tickers.tickers[x].Financials.loc[label][0]
    except KeyError:
        return np.nan

In [82]:
def getInfo(x,label):
    try:
        return tickers.tickers[x].info[label]
    except KeyError:
        return np.nan

In [64]:
tickers = yf.Tickers(sym_ls)

In [98]:
import time

i=0
start = time.time()
total = len(sym_ls)
info_list = []
for s,t in tickers.tickers.items():
    i += 1
    try:
        info_list.append([t.info['sector'],t.info['marketCap'],t.info['shortName'],t.financials.loc['Total Revenue'][0]])
    except KeyError:
        info_list.append([np.nan,np.nan,np.nan,np.nan])
    print('status:',i,'/',total,'time:',time.time()-start,end='\r')

status: 2762 / 2762 time: 19051.3995373249055

In [101]:
info_df = pd.DataFrame(info_list,columns=['sector','market cap','name','total revenue'])

In [102]:
df = company_df.join(info_df)

In [107]:
#df['sector'].value_counts()

df.to_csv('public_2700.csv')

In [None]:
import winsound
duration = 5000  # milliseconds
freq = 440  # Hz
winsound.Beep(freq, duration)

### 3.0 google search contact name and get their social media account (or the most relevant result)

In [42]:
from googlesearch import search

In [47]:
construction_df = pd.read_excel('LinkedIn_Sales_Contact_400.xlsx')

  construction_df = pd.read_excel('LinkedIn_Sales_Contact_400.xlsx')


In [55]:
construction_df.head()

Unnamed: 0,profileUrl,fullName,firstName,lastName,imgUrl,degree,title,companyName,companyUrl,regularCompanyUrl,location,note,dateAdded,vmid,linkedInProfileUrl,name,query,timestamp
0,https://www.linkedin.com/sales/people/ACwAAAAB...,Steve Bret Jensen,Steve Bret,Jensen,https://media-exp3.licdn.com/dms/image/C5603AQ...,2,Talent Acquisition Specialist,Arcadis,https://www.linkedin.com/sales/company/5840,https://www.linkedin.com/company/5840,"Taylor, Arizona, United States",,2021-06-15T00:10:12.543Z,ACwAAAABxNQBAlT98PPnwF1ztJsCzmDYWvsAPK8,https://www.linkedin.com/in/ACwAAAABxNQBAlT98P...,Steve Bret Jensen,https://www.linkedin.com/sales/lists/people/68...,2021-06-15T00:20:51.041Z
1,https://www.linkedin.com/sales/people/ACwAAA_1...,Tiffany Dinh,Tiffany,Dinh,https://media-exp3.licdn.com/dms/image/C5603AQ...,2,"Sr Manager, Human Resources",AECOM,https://www.linkedin.com/sales/company/15656,https://www.linkedin.com/company/15656,"Anaheim, California, United States",,2021-06-15T00:10:23.222Z,ACwAAA_10tgBsu9dn-rB7il4G1ecL5p0Y_F6GeE,https://www.linkedin.com/in/ACwAAA_10tgBsu9dn-...,Tiffany Dinh,https://www.linkedin.com/sales/lists/people/68...,2021-06-15T00:20:51.041Z
2,https://www.linkedin.com/sales/people/ACwAAARG...,Ali AlMalki,Ali,AlMalki,https://media-exp3.licdn.com/dms/image/C4D03AQ...,2,Senior Human Resources Manager,AECOM,https://www.linkedin.com/sales/company/15656,https://www.linkedin.com/company/15656,Saudi Arabia,,2021-06-15T00:10:23.222Z,ACwAAARGE5ABHsgnWZ6H6hOIExM8OLsCkMvFuco,https://www.linkedin.com/in/ACwAAARGE5ABHsgnWZ...,Ali AlMalki,https://www.linkedin.com/sales/lists/people/68...,2021-06-15T00:20:51.041Z
3,https://www.linkedin.com/sales/people/ACwAAALZ...,"Kalpana Varsani, MCIPD",Kalpana,"Varsani, MCIPD",https://media-exp3.licdn.com/dms/image/C4E03AQ...,2,Human Resources Manager,Jacobs,https://www.linkedin.com/sales/company/4025,https://www.linkedin.com/company/4025,"London, England, United Kingdom",,2021-06-15T00:10:12.543Z,ACwAAALZzJgBoNTvtEkLoxKxaSVIN14IQfvO360,https://www.linkedin.com/in/ACwAAALZzJgBoNTvtE...,"Kalpana Varsani, MCIPD",https://www.linkedin.com/sales/lists/people/68...,2021-06-15T00:20:51.041Z
4,https://www.linkedin.com/sales/people/ACwAAABe...,Lavanya Desai,Lavanya,Desai,,2,Manager - Talent Management & Graduate Recruit...,Worley,https://www.linkedin.com/sales/company/6677,https://www.linkedin.com/company/6677,"Thane, Maharashtra, India",,2021-06-15T00:10:12.467Z,ACwAAABerroBSgA1N1O-Xs7Gf0X84nRnzDtcayM,https://www.linkedin.com/in/ACwAAABerroBSgA1N1...,Lavanya Desai,https://www.linkedin.com/sales/lists/people/68...,2021-06-15T00:20:51.041Z


In [56]:
construction_df['twitter']=construction_df['fullName'].apply(lambda x: search(x+' Twitter',num_results=0)[0])

IndexError: list index out of range

In [58]:
for i,row in construction_df.iterrows():
    print(row['fullName'])

Steve Bret Jensen
Tiffany Dinh
Ali AlMalki
Kalpana Varsani, MCIPD
Lavanya Desai
Oliver Thomson
David Stevens
Valeria Pantoja RodrÃ­guez
Phillipa Mauger
Annette Lind FREC
Andrea Moore
Kate Geer
Alex Camacho PHR  SHRM-CP
Maciej SzczÄ™sny
Catherine Macdonald
Caitlan V.
Caroline Kim
Eveliina Tayfur
Kim Mock
Michael Truntzer
Sharon Kitts
Allen Lin
Christopher Wettengel, MBA
Daniel Perry
Doris Lim (sHRBP, IHRP-CP)
Silje Hilden Barningham  (she/her)
Paulette Bennett
Yahya AlZahrani
Alicia Cao
Anne-Karine Brodeur - CRHA
Brandon McLean
Venelina K.
Katarzyna Campbell
Ernest Ho
Puja Singh
Dana Viri Rosa
Lindsay Gosse, CPHR
Michael Mauro, CHRP
Megan Tucker
Tiffany Hypolite, SHRM-CP, CCM
Marica Castillo
Kayla Meinema
Susie Stuart
Sukhyanta Das (MBA, BA)
Ghadah Alajmi
Charu Sharma
Danah AlRabiah
Madison Seeker, PHR
Brogan Petersen
Jacqueline Cregan
Veronica Bryant
Payal Nagar
Natalie Kravchuk
Rebecca Ellison
Andrew Williams, PHR
Kim Rivera
Fitzroy Ellis
Lilian Vieira Lima
Jemqueen "JJ"
Robert Reda I

In [53]:
search('Misha Nikulin Twitter',num_results=0)[0]

['https://twitter.com/mishanikulin?lang=en']

In [5]:
searchQuery = '@mishanikulin'
retweet_filter = '-filter:retweets'

q=searchQuery+retweet_filter
tweetsPerQry = 100
file_name = 'tweets_out.txt'
sinceID = None

In [6]:
tweetSearch(q)

NameError: name 'tweetSearch' is not defined

In [22]:
def tweetSearch(q,fName='tweets_out.txt',sinceId = None):

    max_id = -1
    maxTweets = 1000

    tweetCount = 0
    print("Downloading max {0} tweets".format(maxTweets))
    with open(fName, 'w') as f:
        while tweetCount < maxTweets:
            tweets = []
            try:
                if (max_id <= 0):
                    if (not sinceId):
                        new_tweets = api.search(q=q, lang ="en", count=tweetsPerQry, tweet_mode='extended')

                    else:
                        new_tweets = api.search(q=q, lang ="en", count=tweetsPerQry,
                                            since_id=sinceId, tweet_mode='extended')
                else:
                    if (not sinceId):
                        new_tweets = api.search(q=q, lang ="en", count=tweetsPerQry,
                                            max_id=str(max_id - 1), tweet_mode='extended')
                    else:
                        new_tweets = api.search(q=q, lang ="en", count=tweetsPerQry,
                                            max_id=str(max_id - 1),
                                            since_id=sinceId, tweet_mode='extended')

                if not new_tweets:
                    print("No more tweets found")
                    break
                for tweet in new_tweets:
                    f.write(str(tweet.full_text.replace('\n','').encode("utf-8"))+"\n")

                tweetCount += len(new_tweets)
                print("Downloaded {0} tweets".format(tweetCount))
                max_id = new_tweets[-1].id

            except tweepy.TweepError as e:
                # Just exit if any error
                print("some error : " + str(e))
                break

    print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))
    return True

In [40]:
import tweepy
import pandas as pd

auth = tweepy.OAuthHandler(TWITTER_API['Consumer Key'],TWITTER_API['Consumer Secret Key'])
auth.set_access_token(TWITTER_API['Access Token'],TWITTER_API['Access Token Secret'])

api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)

tweet_df = tweet2df()

In [41]:
tweet_df

Unnamed: 0,name,text
0,Saatchi Gallery,🏖️ \n\n📷 ‘Beach’ painting by Australian artist...
1,University of Alberta,Akanksha Bhatnagar pursued her passion for pol...
2,Alberta Government,If you received your first #COVID19AB vaccine ...
3,عادل علي بن علي,لا تحزن إذا ارهقتك الهموم ، وضاقت بك الدنيا بم...
4,Ajans Haber,"Rus heyeti, Türkiye ile uçuşların açılmasına i..."
5,Canada,"This National #IndigenousHistoryMonth, take a ..."
6,Sukhbir Singh Badal,Held meetings with office bearers &amp; worker...
7,Canadian Franchise Association,Pillar To Post Home Inspectors is the largest ...
8,Ajans Haber,Özel tiyatro projeleri için destek başvuruları...
9,عادل علي بن علي,قال أحد الصالحين\nاقرأ ما شئت واحفظ ما شئت واك...


In [39]:
def tweet2df(user=None,tweetNo = 50):
    tweet_df = pd.DataFrame(columns=['name','text'])
    
    if user is None:
        tweets = api.home_timeline(count=tweetNo)
    else:
        tweets = api.user_timeline(user,count=tweetNo)
    
    for tweet in tweets:
        new_df = pd.DataFrame([[tweet.user.name,tweet.text]], columns=['name','text'])
        tweet_df = tweet_df.append(new_df,ignore_index=True)

    return tweet_df

In [73]:
tweet_df['logic']=tweet_df['text'].str.contains('team')
tweet_df[tweet_df['logic']]['text'].values[0]

'RT @NASAPrize: We are kicking off the final competition of the 3D-Printed Habitat Challenge. Two teams are competing head to head for guts,…'

In [38]:
import openai

openai.api_key = OPENAI_API_KEY
openaiWraper(sample_tweet,engine = 'davinci')

' POSITIVE'

In [35]:
def openaiWraper(text,application = 'sentiment classifier',engine = 'ada'):
    openai.api_key = OPENAI_API_KEY
    
    if application == 'sentiment classifier':
        text_prompt = f'This is a tweet sentiment classifier \n\n\nTweet: "{text}"\nSentiment:'
        max_token = 3
    else:
        text_prompt = text
        max_token = 60
        
    response = openai.Completion.create(
      engine=engine,
      prompt=text_prompt,
      temperature=0.3,
      max_tokens=max_token,
      top_p=1.0,
      frequency_penalty=0.5,
      presence_penalty=0.0,
      stop=["###"]
    )
    
    text_return = response['choices'][0]['text']
    
    return text_return

In [12]:
response['choices'][0]['text']

'Positive"\n\nThis is a tweet sentiment classifier \nTweet: "I\'m not a fan of the new Facebook layout. It\'s hard to find my friends\' updates."\nSentiment:"Negative"\n\nThis is a tweet sentiment classifier \nTweet: "I'

In [5]:
url = 'https://www.alphavantage.co/query?function=OVERVIEW&symbol=IBM&apikey=demo'

r = requests.get(url)
data = r.json()

print(data.keys())

dict_keys(['Symbol', 'AssetType', 'Name', 'Description', 'CIK', 'Exchange', 'Currency', 'Country', 'Sector', 'Industry', 'Address', 'FullTimeEmployees', 'FiscalYearEnd', 'LatestQuarter', 'MarketCapitalization', 'EBITDA', 'PERatio', 'PEGRatio', 'BookValue', 'DividendPerShare', 'DividendYield', 'EPS', 'RevenuePerShareTTM', 'ProfitMargin', 'OperatingMarginTTM', 'ReturnOnAssetsTTM', 'ReturnOnEquityTTM', 'RevenueTTM', 'GrossProfitTTM', 'DilutedEPSTTM', 'QuarterlyEarningsGrowthYOY', 'QuarterlyRevenueGrowthYOY', 'AnalystTargetPrice', 'TrailingPE', 'ForwardPE', 'PriceToSalesRatioTTM', 'PriceToBookRatio', 'EVToRevenue', 'EVToEBITDA', 'Beta', '52WeekHigh', '52WeekLow', '50DayMovingAverage', '200DayMovingAverage', 'SharesOutstanding', 'SharesFloat', 'SharesShort', 'SharesShortPriorMonth', 'ShortRatio', 'ShortPercentOutstanding', 'ShortPercentFloat', 'PercentInsiders', 'PercentInstitutions', 'ForwardAnnualDividendRate', 'ForwardAnnualDividendYield', 'PayoutRatio', 'DividendDate', 'ExDividendDate',

In [108]:
design_df = pd.read_excel("Cycle 2 Desktop Research Input Data (2).xlsx",sheet_name='DesignFirms')
contractor_df = pd.read_excel("Cycle 2 Desktop Research Input Data (2).xlsx",sheet_name='Contractor Firms')
PM_df = pd.read_excel("Cycle 2 Desktop Research Input Data (2).xlsx",sheet_name='Program Management Firms')

  design_df = pd.read_excel("Cycle 2 Desktop Research Input Data (2).xlsx",sheet_name='DesignFirms')
  contractor_df = pd.read_excel("Cycle 2 Desktop Research Input Data (2).xlsx",sheet_name='Contractor Firms')
  PM_df = pd.read_excel("Cycle 2 Desktop Research Input Data (2).xlsx",sheet_name='Program Management Firms')


In [98]:
def getElement(x,n=0):
    if type(x)==type([]) and n<len(x):
        return x[n]

In [109]:
design_df['name'] = design_df['FIRM'].str.split(',').apply(getElement)
design_df['city'] = design_df['FIRM'].str.split(',').apply(lambda x: getElement(x,1))
design_df['state']= design_df['FIRM'].str.split(',').apply(lambda x: getElement(x,2))

contractor_df['name'] = contractor_df['FIRM'].str.split(',').apply(getElement)
contractor_df['city'] = contractor_df['FIRM'].str.split(',').apply(lambda x: getElement(x,1))
contractor_df['state']= contractor_df['FIRM'].str.split(',').apply(lambda x: getElement(x,2))

PM_df['name'] = PM_df['FIRM'].str.split(',').apply(getElement)
PM_df['city'] = PM_df['FIRM'].str.split(',').apply(lambda x: getElement(x,1))
PM_df['state']= PM_df['FIRM'].str.split(',').apply(lambda x: getElement(x,2))

In [104]:
def getSearchLink(x):
    return search(x)[0]

In [110]:
design_df['Website']=design_df['name'].apply(getSearchLink)
contractor_df['Website']=contractor_df['name'].apply(getSearchLink)
PM_df['Website']=PM_df['name'].apply(getSearchLink)

In [42]:
from searchtweets import load_credentials

load_credentials(filename="./twitter_api.yaml",
                 yaml_key="twitter_api",
                 env_overwrite=False)

SyntaxError: invalid syntax (<ipython-input-42-ee053cc93206>, line 1)