In [1]:
from googleapiclient.discovery import build
import time
import pandas as pd
# from apiclient import build

SERVER = 'https://www.googleapis.com'

API_VERSION = 'v1beta'
DISCOVERY_URL_SUFFIX = '/discovery/v1/apis/trends/' + API_VERSION + '/rest'
DISCOVERY_URL = SERVER + DISCOVERY_URL_SUFFIX

developerKey="KLUCZ"

SERVICE = build('trends', 'v1beta',
                developerKey=developerKey,
                discoveryServiceUrl=DISCOVERY_URL)

def getGraph(service, terms, start_date, end_date, geo):
    """
    Returns a Graph of search volume per time points, normalized.
    For better insights, one could provide restrictions for time range,
    geographic region, etc.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'terms' - up to 30 search terms to compare/explore,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'geo' - refer to the ISO-3166-2 for the supported values.
    """
    return service.getGraph(terms = terms,
                            restrictions_geo = geo,
                            restrictions_startDate = start_date,
                            restrictions_endDate = end_date).execute()

def getGraphAverages(service, terms, start_date, end_date, geo):
    """
    Returns the averages of normalized search volume for the given terms.
    For better insights, one could provide restrictions for time range,
    geographic region, etc.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'terms' - up to 30 search terms to compare/explore,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'geo' - refer to the ISO-3166-2 for the supported values. 
    """
    return service.getGraphAverages(terms = terms,
                                    restrictions_geo = geo,
                                    restrictions_startDate = start_date,
                                    restrictions_endDate = end_date).execute()

def getRisingQueries(service, term, start_date, end_date, geo):
    """
    Get a list of rising queries that were searched along with the requested
    term, under the given restrictions.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'term' - can be either entity (for example /m/0d2p9p) or query,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'geo' - refer to the ISO-3166-2 for the supported values. 
    """
    return service.getRisingQueries(term = term,
                                    restrictions_geo = geo,
                                    restrictions_startDate = start_date,
                                    restrictions_endDate = end_date).execute()

def getRisingTopics(service, term, start_date, end_date, geo):
    """
    Get a list of rising topics that were searched along with the requested
    term, under the given restrictions.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'term' - can be either entity (for example /m/0d2p9p) or query,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01, 
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'geo' - refer to the ISO-3166-2 for the supported values. 
    """
    return service.getRisingTopics(term = term,
                                   restrictions_geo = geo,
                                   restrictions_startDate = start_date,
                                   restrictions_endDate = end_date).execute()

def getTimelinesForHealth(service, terms, start_date, end_date, geo):
    """
    Get a list of graph of search volumes per time within a set of restrictions.
    Data is sampled so the accuracy of the numbers cannot be guaranteed.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'term' - can be either entity (for example /m/0d2p9p) or query
                 and include ORs using '+' sign,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'geo' - refer to the ISO-3166-2 for the supported values. 
    """
    return service.getTimelinesForHealth(terms = terms,
                                         geoRestriction_region = geo,
                                         time_startDate = start_date,
                                         time_endDate = end_date).execute()

def getTopQueries(service, term, start_date, end_date, geo):
    """
    Get a list of top queries that were searched along with the requested
    term, under the given restrictions.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'term' - can be either entity (for example /m/0d2p9p) or query,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'geo' - refer to the ISO-3166-2 for the supported values. 
    """
    return service.getTopQueries(term = term,
                                 restrictions_geo = geo,
                                 restrictions_startDate = start_date,
                                 restrictions_endDate = end_date).execute()

def getTopTopics(service, term, start_date, end_date, geo):
    """
    Get a list of top topics that were searched along with the requested
    term, under the given restrictions.
    
    -----Parameters-----
        'service' - configured Google API Client,
        'term' - can be either entity (for example /m/0d2p9p) or query,
        'start_date' - month and a year in the format YYYY-MM e.g. 2010-01,
        'end_date' - month and a year in the format YYYY-MM e.g. 2010-01, 
        'geo' - refer to the ISO-3166-2 for the supported values.
    """
    return service.getTopTopics(term = term,
                                restrictions_geo = geo,
                                restrictions_startDate = start_date,
                                restrictions_endDate = end_date).execute()

In [45]:
'''
getGraph OK
getGraphAverages 400
getRisingQueries OK
getRisingTopics OK
getTimelinesForHealth 400
getTopQueries OK
getTopTopics OK
'''

terms = ['grypa', 'kaszel', 'choroba']
start_date = '2010-01-01'
end_date = '2020-02-01'
regions = ["SL","DS",'KP', 'LB', 'LU', 'LD', 'MA','MZ', 'OP', 'PK', 'PD','PM','SK', 'WN', 'WP', 'ZP']


df = pd.DataFrame([[0, 0, 0, 0]], columns=["Token", "Place", "Date", "Value"])

for k in range(len(regions)):
    geo = "PL-"+regions[k]
    print(regions[k])
    data = getTimelinesForHealth(SERVICE, terms, start_date, end_date, geo)
#     time.sleep(60)
    for j in range(len(data['lines'])):
        for i in range(len(data['lines'][0]["points"])):
            df_data = data['lines'][j]["points"][i]['date']
            df_place = regions[k]
            df_value = data['lines'][j]["points"][i]['value']
            df_token = data['lines'][j]['term']

            df_new = pd.DataFrame([[df_token, df_place, df_data, df_value]], columns=["Token", "Place", "Date", "Value"])
            df = df.append(df_new)

            name = "trends_regions.csv"
            df = df[1:]
            df.to_csv(name, mode='a', index=False, header=False)

SL
DS
KP
LB
LU
LD
MA
MZ
OP
PK
PD
PM
SK
WN
WP
ZP


In [60]:
df = pd.read_csv("trends_regions.csv", header=0)
df.columns=["Token","Place", "Date", "Value"]
df.to_csv("trends.csv", index=False)
df.head()

Unnamed: 0,Token,Place,Date,Value
0,grypa,SL,Jan 10 2010,699.486201
1,grypa,SL,Jan 17 2010,782.967092
2,grypa,SL,Jan 24 2010,689.975973
3,grypa,SL,Jan 31 2010,773.999028
4,grypa,SL,Feb 07 2010,600.234477


In [63]:
len(df.Place.unique()), len(regions), df.Place.unique()

(16, 16, array(['śląskie', 'dolnośląskie', 'kujawsko-pomorskie', 'lubelskie',
        'lubuskie', 'łódzkie', 'małopolskie', 'mazowieckie', 'opolskie',
        'podkarpackie', 'podlaskie', 'pomorskie', 'świętokrzyskie',
        'warmińsko-mazurskie', 'wielkopolskie', 'zachodniopomorskie'],
       dtype=object))

In [62]:
regions = ["SL","DS",'KP', 'LB', 'LU', 'LD', 'MA','MZ', 'OP', 'PK', 'PD','PM','SK', 'WN', 'WP', 'ZP']

df.Place[df.Place=="SL"]="śląskie"
df.Place[df.Place=="PD"]="podlaskie"
df.Place[df.Place=="DS"]="dolnośląskie"
df.Place[df.Place=="KP"]="kujawsko-pomorskie"
df.Place[df.Place=="LB"]="lubelskie"
df.Place[df.Place=="LU"]="lubuskie"
df.Place[df.Place=="LD"]="łódzkie"
df.Place[df.Place=="MA"]="małopolskie"
df.Place[df.Place=="MZ"]="mazowieckie"
df.Place[df.Place=="OP"]="opolskie"
df.Place[df.Place=="PK"]="podkarpackie"
df.Place[df.Place=="PM"]="pomorskie"
df.Place[df.Place=="SK"]="świętokrzyskie"
df.Place[df.Place=="WN"]="warmińsko-mazurskie"
df.Place[df.Place=="WP"]="wielkopolskie"
df.Place[df.Place=="ZP"]="zachodniopomorskie"
df.Place.unique()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://p

array(['śląskie', 'dolnośląskie', 'kujawsko-pomorskie', 'lubelskie',
       'lubuskie', 'łódzkie', 'małopolskie', 'mazowieckie', 'opolskie',
       'podkarpackie', 'podlaskie', 'pomorskie', 'świętokrzyskie',
       'warmińsko-mazurskie', 'wielkopolskie', 'zachodniopomorskie'],
      dtype=object)

In [65]:
df.to_csv("trends.csv", index=False)
df.head()

Unnamed: 0,Token,Place,Date,Value
0,grypa,śląskie,Jan 10 2010,699.486201
1,grypa,śląskie,Jan 17 2010,782.967092
2,grypa,śląskie,Jan 24 2010,689.975973
3,grypa,śląskie,Jan 31 2010,773.999028
4,grypa,śląskie,Feb 07 2010,600.234477


In [66]:
df.Date = pd.to_datetime(df.Date, format='%b %d %Y')
df.to_csv("trends.csv", mode='w', index=False, header=True)
df.head()

Unnamed: 0,Token,Place,Date,Value
0,grypa,śląskie,2010-01-10,699.486201
1,grypa,śląskie,2010-01-17,782.967092
2,grypa,śląskie,2010-01-24,689.975973
3,grypa,śląskie,2010-01-31,773.999028
4,grypa,śląskie,2010-02-07,600.234477
