In [16]:
import requests
import math
import pandas as pd
import os

# While Loop

### wrong code
```python
while not (next_page == None):
    response = requests.get(swapi_people_df['next'])
    data = response.json()

    next_page = data['next']

    df = pd.concat([df, pd.DataFrame(data['results'])]).reset_index()
```

### alternate version
```python
url = 'https://swapi.dev/api/people/'
people = []
while url:
    response = requests.get(url)
    data = response.json()
    people.extend(data['results'])
    url = data['next']

df = pd.DataFrame(people)
```

# Exercises

Create a new local git repository and remote repository on github named time-series-exercises. Save this work for this module in your time-series-exercises repo.

The end result of this exercise should be a file named acquire.py.

### 1. Using the code from the lesson as a guide and the REST API from https://swapi.dev/ as we did in the lesson, create a dataframe named people that has all of the data for people.

In [36]:
# Star Wars API Root Directory Setup
# root url (table of contents)
root_url = 'https://swapi.dev/api/'

# saving response to save as .json
root_response = requests.get(root_url)

# converting to .json
root_response_data = root_response.json()

In [3]:
# accessing starwars api for people
people_df = pd.DataFrame()
people_next_page = root_response_data["people"]

while people_next_page is not None:
    people_response = requests.get(people_next_page)
    people_data = people_response.json()
    people_df = pd.concat([people_df, pd.DataFrame(people_data['results'])], ignore_index=True)
    people_next_page = people_data['next']

people_df.shape

(82, 16)

### 2. Do the same thing, but for planets.

In [4]:
planets_df = pd.DataFrame()
people_next_page = root_response_data["planets"]

while planets_next_page is not None:
    planets_response = requests.get(planets_next_page)
    planets_data = planets_response.json()
    planets_df = pd.concat([planets_df, pd.DataFrame(planets_data['results'])], ignore_index=True)
    planets_next_page = planets_data['next']

planets_df.shape

(60, 14)

### 3. Extract the data for starships.

In [14]:
starships_df = pd.DataFrame()
starships_next_page = root_response_data["starships"]

while starships_next_page is not None:
    starships_response = requests.get(starships_next_page)
    starships_data = starships_response.json()
    starships_df = pd.concat([starships_df, pd.DataFrame(starships_data['results'])], ignore_index=True)
    starships_next_page = starships_data['next']

starships_df.shape

(36, 18)

### 4. Save the data in your files to local csv files so that it will be faster to access in the future.

`df.to_csv(,drop=True)`

In [18]:
filename = 'people.csv'

if not os.path.exists(filename):
    people_df.to_csv(filename, index=False)
    print(f"Saving {filename}...")
else:
    print(f"File {filename} already exists.")

File people.csv already exists.


In [19]:
filename = 'planets.csv'

if not os.path.exists(filename):
    planets_df.to_csv(filename, index=False)
    print(f"Saving {filename}...")
else:
    print(f"File {filename} already exists.")

Saving planets.csv...


In [20]:
filename = 'starships.csv'

if not os.path.exists(filename):
    starships_df.to_csv(filename, index=False)
    print(f"Saving {filename}...")
else:
    print(f"File {filename} already exists.")

Saving starships.csv...


### 5. Combine the data from your three separate dataframes into one large dataframe.

In [26]:
people_df["Type"] = "People"
planets_df["Type"] = "Planet"
starships_df["Type"] = "Starship"

In [27]:
giant_df = pd.concat([people_df, planets_df, starships_df])

In [31]:
giant_df["Type"].value_counts()

People      82
Planet      60
Starship    36
Name: Type, dtype: int64

### 6. Acquire the Open Power Systems Data for Germany, which has been rapidly expanding its renewable energy production in recent years. The data set includes country-wide totals of electricity consumption, wind power production, and solar power production for 2006-2017. You can get the data here: https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv

`df.read_csv(,drop=True)`

In [34]:
url = "https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv"
energy_df = pd.read_csv(url)

In [35]:
filename = 'opsd_germany_daily.csv'

if not os.path.exists(filename):
    energy_df.to_csv(filename, index=False)
    print(f"Saving {filename}...")
else:
    print(f"File {filename} already exists.")

Saving opsd_germany_daily.csv...


### 7. Make sure all the work that you have done above is reproducible. That is, you should put the code above into separate functions in the acquire.py file and be able to re-run the functions and get the same data.

```python
get_people()
get_planets()
get_starships()
get_all_starwars()
get_energy()
```

In [50]:
def get_star_wars_api_url(info):
    # Star Wars API Root Directory Setup
    # root url (table of contents)
    root_url = 'https://swapi.dev/api/'

    # saving response to save as .json
    root_response = requests.get(root_url)

    # converting to .json
    root_response_data = root_response.json()
    
    return root_response_data[info]

In [49]:
def get_people(save=True):
    people_df = pd.DataFrame()
    people_next_page = get_star_wars_api_url("people")

    while people_next_page is not None:
        people_response = requests.get(people_next_page)
        people_data = people_response.json()
        people_df = pd.concat([people_df, pd.DataFrame(people_data['results'])], ignore_index=True)
        people_next_page = people_data['next']
    
    if save:
        # save to csv if option selected
        filename = 'people.csv'

        if not os.path.exists(filename):
            people_df.to_csv(filename, index=False)
            print(f"Saving {filename}...")
        else:
            print(f"File {filename} already exists.")
            
    return people_df

In [48]:
def get_planets(save=True):
    planets_df = pd.DataFrame()
    planets_next_page = get_star_wars_api_url("planets")

    while planets_next_page is not None:
        planets_response = requests.get(planets_next_page)
        planets_data = planets_response.json()
        planets_df = pd.concat([planets_df, pd.DataFrame(planets_data['results'])], ignore_index=True)
        planets_next_page = planets_data['next']
       
    if save:
        # save to csv if option selected
        filename = 'planets.csv'

        if not os.path.exists(filename):
            planets_df.to_csv(filename, index=False)
            print(f"Saving {filename}...")
        else:
            print(f"File {filename} already exists.")
    return planets_df

In [47]:
def get_starships(save=True):
    starships_df = pd.DataFrame()
    starships_next_page = get_star_wars_api_url("starships")

    while starships_next_page is not None:
        starships_response = requests.get(starships_next_page)
        starships_data = starships_response.json()
        starships_df = pd.concat([starships_df, pd.DataFrame(starships_data['results'])], ignore_index=True)
        starships_next_page = starships_data['next']
    
    if save:
        # save to csv if option selected
        filename = 'starships.csv'

        if not os.path.exists(filename):
            starships_df.to_csv(filename, index=False)
            print(f"Saving {filename}...")
        else:
            print(f"File {filename} already exists.")

    return starships_df

In [46]:
def get_all_starwars(save=True):
    # 3 temporary dataframes to hold each type of data
    people_df = get_people(save)
    planets_df = get_planets(save)
    starships_df = get_starships(save)
    
    # columns for sorting ability
    people_df["Type"] = "People"
    planets_df["Type"] = "Planet"
    starships_df["Type"] = "Starship"
    
    #concatenate all 3 into one giant DataFrame
    
    return pd.concat([people_df, planets_df, starships_df])

In [None]:
def get_energy():
    url = "https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv"
    energy_df = pd.read_csv(url)
    
    filename = 'opsd_germany_daily.csv'

    if not os.path.exists(filename):
        energy_df.to_csv(filename, index=False)
        print(f"Saving {filename}...")
    else:
        print(f"File {filename} already exists.")
    return energy_df