# Imports and Setup

In [45]:
import requests
import pandas as pd
import configparser
import zeit

import plotly.express as px

In [4]:
c = configparser.ConfigParser()
c.read("config.ini")
zeit_key = c.get("Zeit", "key")

#set the API, authenticate and check status
api = zeit.API()
api.set_token(zeit_key)
api.get_status()

'everything ok'

# call the API

In [5]:
#call to the API and facet by authors
headers = {"X-Authorization": zeit_key}
params = {"facet_field": "author", "limit": 2}
url = "http://api.zeit.de/content"
import requests
r = requests.get(url=url, headers = headers, params=params)

# Construct Series

In [6]:
#construct a list from the facet field
l = r.json()["facets"]["author"]

In [20]:
#convert the list to a pandas Series , quite intensive work
long_series = pd.Series(dtype = "object")
for i in range(0,len(l)-2, 2):
    author = l[i]
    count = l[i+1]
    long_series[author] = count

In [70]:
#filter the list for better results
series = long_series[long_series > 10]
#show the top authors by their articles
series = series.sort_values(ascending=False)
series[:10]

dpa                  3199
AFP                  1840
redaktion            1549
Oliver Fritsch       1425
Reuters              1327
Christoph Drösser    1322
Katharina Schuler    1225
Theo Sommer          1224
Tina Groll           1206
Kai Biermann         1192
dtype: int64

# explore names 

In [24]:
#pandas series for names
names = long_series.index.to_series(index = pd.RangeIndex(start = 0, stop = len(long_series))) #new series from the index of long series
names = names.str.split().explode(ignore_index = True)

#show the most used names
names = names.value_counts()

In [27]:
names[:200]

Annette        49
V.             49
Jutta          49
Hofmann        49
Rene           49
-              49
Heike          48
Klaus-Peter    48
Lothar         48
Katja          48
dtype: int64

In [37]:
df = names[:150].to_frame(name = "count") #construct a dataframe from the series
df["gender"] = "m" #set the gender as m
df.to_csv("../data/names.csv") #save to csv

#now i cleaned everything

In [66]:
df = pd.read_csv("../data/names.csv", names = ["names", "count", "gender"], header = 0) #read in the data again
df

Unnamed: 0,names,count,gender
0,Peter,625,m
1,Hans,564,m
2,Michael,534,m
3,Thomas,429,m
4,Wolfgang,416,m
...,...,...,...
100,Tobias,68,m
101,Marc,67,m
102,Nina,66,f
103,Stefanie,64,f


In [112]:
px.bar(df[:50], y = "count", color="gender", hover_name="names")

Unnamed: 0,index,count,gender
0,Peter,625,m
1,Hans,564,m
2,Michael,534,m
3,Thomas,429,m
4,Wolfgang,416,m
...,...,...,...
101,Tobias,68,m
102,Marc,67,m
103,Nina,66,f
104,Stefanie,64,f


# explore top authors

In [83]:
series[:20]

dpa                  3199
AFP                  1840
redaktion            1549
Oliver Fritsch       1425
Reuters              1327
Christoph Drösser    1322
Katharina Schuler    1225
Theo Sommer          1224
Tina Groll           1206
Kai Biermann         1192
Tilman Steffen       1174
Josef Joffe          1137
Michael Thumann      1089
Patrick Beuth        1048
Mark Schieritz       1027
Lisa Caspari         1000
Christian Spiller     997
Gunter Hofmann        932
Gero von Randow       908
Mark Spörrle          873
dtype: int64

In [113]:
api.search_for("Barbara", "author")

 Search for 'Barbara': 142 results, limit: 10, matches : 
 
Barbara Beüys: http://api.zeit.de/author/Barbara-Beüys
Barbara Sichtermann: http://api.zeit.de/author/Barbara-Sichtermann
Barbara Ungeheuer: http://api.zeit.de/author/Barbara-Ungeheuer
Barbara von Jhering: http://api.zeit.de/author/Barbara-von-Jhering
Barbara Buerer: http://api.zeit.de/author/Barbara-Buerer
Barbara Lehmann: http://api.zeit.de/author/Barbara-Lehmann
Barbara v. Jhering: http://api.zeit.de/author/Barbara-v.-Jhering
Barbara Bondy: http://api.zeit.de/author/Barbara-Bondy
Barbara Ritzert: http://api.zeit.de/author/Barbara-Ritzert
Barbara Gaehtgens: http://api.zeit.de/author/Barbara-Gaehtgens

In [109]:
author_list = ["dpa", "AFP","redaktion", "Oliver-Fritsch","Reuters", "Christoph-Drösser", 
"Katharina-Schuler", "Theo-Sommer", "Tina-Groll", "Kai-Biermann", "Tilman-Steffen",
"Josef-Joffe","Michael-Thumann", "Patrick-Beuth",  "Mark-Schieritz", "Lisa-Caspari"
"Christian-Spiller", "Gunter-Hofmann", "Gero-von-Randow", "Mark-Spörrle"]