# ACQUIRE DATA

## Exercises
Create a new local git repository and remote repository on github named time-series-exercises. Save this work for this module in your time-series-exercises repo.

The end result of this exercise should be a file named acquire.py.

## IMPORTS

In [1]:
import pandas as pd
import requests
import math

### 1. Using the code from the lesson as a guide and the REST API from https://swapi.dev/ as we did in the lesson, create a dataframe named people that has all of the data for people.

In [2]:
# This is a URL. The end result are URI
response = requests.get('https://swapi.dev/api/')
response.json()

{'people': 'https://swapi.dev/api/people/',
 'planets': 'https://swapi.dev/api/planets/',
 'films': 'https://swapi.dev/api/films/',
 'species': 'https://swapi.dev/api/species/',
 'vehicles': 'https://swapi.dev/api/vehicles/',
 'starships': 'https://swapi.dev/api/starships/'}

In [3]:
# Focus on just `people` page
response = requests.get('https://swapi.dev/api/people/')
data = response.json()
people_df = pd.DataFrame()

# The range started at 2 because the dataframe already contained the first page. 
for i in range(1, 10):
    response = requests.get(f'https://swapi.dev/api/people/?page={i}')
    data = response.json()
    if len(data.keys())>1:
        people_df = pd.concat([people_df, pd.DataFrame(data['results'])])

people_df = people_df.reset_index()

people_df.to_csv('people.csv')

# # Display the final DataFrame    
people_df.head()

Unnamed: 0,index,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url
0,0,Luke Skywalker,172,77,blond,fair,blue,19BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/
1,1,C-3PO,167,75,,gold,yellow,112BBY,,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/
2,2,R2-D2,96,32,,"white, blue",red,33BBY,,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/
3,3,Darth Vader,202,136,none,white,yellow,41.9BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/
4,4,Leia Organa,150,49,brown,light,brown,19BBY,female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/


### 2. Do the same thing, but for planets.

In [4]:
# Focus on just `people` page
response = requests.get('https://swapi.dev/api/planets/')
data = response.json()
planets_df = pd.DataFrame()

# The range started at 2 because the dataframe already contained the first page. 
for i in range(1, 10):
    response = requests.get(f'https://swapi.dev/api/planets/?page={i}')
    data = response.json()
    if len(data.keys())>1:
        planets_df = pd.concat([planets_df, pd.DataFrame(data['results'])])

planets_df = planets_df.reset_index()

planets_df.to_csv('planets.csv')

# # Display the final DataFrame    
planets_df.head()

Unnamed: 0,index,name,rotation_period,orbital_period,diameter,climate,gravity,terrain,surface_water,population,residents,films,created,edited,url
0,0,Tatooine,23,304,10465,arid,1 standard,desert,1,200000,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-09T13:50:49.641000Z,2014-12-20T20:58:18.411000Z,https://swapi.dev/api/planets/1/
1,1,Alderaan,24,364,12500,temperate,1 standard,"grasslands, mountains",40,2000000000,"[https://swapi.dev/api/people/5/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T11:35:48.479000Z,2014-12-20T20:58:18.420000Z,https://swapi.dev/api/planets/2/
2,2,Yavin IV,24,4818,10200,"temperate, tropical",1 standard,"jungle, rainforests",8,1000,[],[https://swapi.dev/api/films/1/],2014-12-10T11:37:19.144000Z,2014-12-20T20:58:18.421000Z,https://swapi.dev/api/planets/3/
3,3,Hoth,23,549,7200,frozen,1.1 standard,"tundra, ice caves, mountain ranges",100,unknown,[],[https://swapi.dev/api/films/2/],2014-12-10T11:39:13.934000Z,2014-12-20T20:58:18.423000Z,https://swapi.dev/api/planets/4/
4,4,Dagobah,23,341,8900,murky,,"swamp, jungles",8,unknown,[],"[https://swapi.dev/api/films/2/, https://swapi...",2014-12-10T11:42:22.590000Z,2014-12-20T20:58:18.425000Z,https://swapi.dev/api/planets/5/


### 3. Extract the data for starships.

In [5]:
# Focus on just `people` page
response = requests.get('https://swapi.dev/api/starships/')
data = response.json()
starships_df = pd.DataFrame()

# The range started at 2 because the dataframe already contained the first page. 
for i in range(1, 10):
    response = requests.get(f'https://swapi.dev/api/starships/?page={i}')
    data = response.json()
    if len(data.keys())>1:
        starships_df = pd.concat([starships_df, pd.DataFrame(data['results'])])

starships_df = starships_df.reset_index()

starships_df.to_csv('starships.csv')

# # Display the final DataFrame    
starships_df

Unnamed: 0,index,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class,pilots,films,created,edited,url
0,0,CR90 corvette,CR90 corvette,Corellian Engineering Corporation,3500000,150.0,950,30-165,600,3000000,1 year,2.0,60,corvette,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T14:20:33.369000Z,2014-12-20T21:23:49.867000Z,https://swapi.dev/api/starships/2/
1,1,Star Destroyer,Imperial I-class Star Destroyer,Kuat Drive Yards,150000000,1600.0,975,47060,,36000000,2 years,2.0,60,Star Destroyer,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:08:19.848000Z,2014-12-20T21:23:49.870000Z,https://swapi.dev/api/starships/3/
2,2,Sentinel-class landing craft,Sentinel-class landing craft,"Sienar Fleet Systems, Cyngus Spaceworks",240000,38.0,1000,5,75,180000,1 month,1.0,70,landing craft,[],[https://swapi.dev/api/films/1/],2014-12-10T15:48:00.586000Z,2014-12-20T21:23:49.873000Z,https://swapi.dev/api/starships/5/
3,3,Death Star,DS-1 Orbital Battle Station,"Imperial Department of Military Research, Sien...",1000000000000,120000.0,,342953,843342,1000000000000,3 years,4.0,10,Deep Space Mobile Battlestation,[],[https://swapi.dev/api/films/1/],2014-12-10T16:36:50.509000Z,2014-12-20T21:26:24.783000Z,https://swapi.dev/api/starships/9/
4,4,Millennium Falcon,YT-1300 light freighter,Corellian Engineering Corporation,100000,34.37,1050,4,6,100000,2 months,0.5,75,Light freighter,"[https://swapi.dev/api/people/13/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T16:59:45.094000Z,2014-12-20T21:23:49.880000Z,https://swapi.dev/api/starships/10/
5,5,Y-wing,BTL Y-wing,Koensayr Manufacturing,134999,14.0,1000km,2,0,110,1 week,1.0,80,assault starfighter,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-12T11:00:39.817000Z,2014-12-20T21:23:49.883000Z,https://swapi.dev/api/starships/11/
6,6,X-wing,T-65 X-wing,Incom Corporation,149999,12.5,1050,1,0,110,1 week,1.0,100,Starfighter,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-12T11:19:05.340000Z,2014-12-20T21:23:49.886000Z,https://swapi.dev/api/starships/12/
7,7,TIE Advanced x1,Twin Ion Engine Advanced x1,Sienar Fleet Systems,unknown,9.2,1200,1,0,150,5 days,1.0,105,Starfighter,[https://swapi.dev/api/people/4/],[https://swapi.dev/api/films/1/],2014-12-12T11:21:32.991000Z,2014-12-20T21:23:49.889000Z,https://swapi.dev/api/starships/13/
8,8,Executor,Executor-class star dreadnought,"Kuat Drive Yards, Fondor Shipyards",1143350000,19000.0,,279144,38000,250000000,6 years,2.0,40,Star dreadnought,[],"[https://swapi.dev/api/films/2/, https://swapi...",2014-12-15T12:31:42.547000Z,2014-12-20T21:23:49.893000Z,https://swapi.dev/api/starships/15/
9,9,Rebel transport,GR-75 medium transport,"Gallofree Yards, Inc.",unknown,90.0,650,6,90,19000000,6 months,4.0,20,Medium transport,[],"[https://swapi.dev/api/films/2/, https://swapi...",2014-12-15T12:34:52.264000Z,2014-12-20T21:23:49.895000Z,https://swapi.dev/api/starships/17/


### 4. Save the data in your files to local csv files so that it will be faster to access in the future.

In [6]:
# people_df.to_csv('people.csv')
# planets_df.to_csv('planets.csv')
# starships_df.to_csv('starships.csv')

### 5. Combine the data from your three separate dataframes into one large dataframe.

In [8]:
# Load the CSV files into DataFrames
people_df = pd.read_csv('people.csv')
planets_df = pd.read_csv('planets.csv')
starships_df = pd.read_csv('starships.csv')

# Perform the concatenation based on matching values in 'homeworld' and 'url' columns
merged_df = pd.merge(people_df, planets_df, left_on='homeworld', right_on='url', how='left')
merged_df['starships'] = merged_df['starships'].str.replace(r"\[|\]|'", "", regex=True)
merged_df = pd.merge(merged_df, starships_df, left_on='starships', right_on='url', how='left')
merged_df

merged_df

Unnamed: 0,Unnamed: 0_x,index_x,name_x,height,mass,hair_color,skin_color,eye_color,birth_year,gender,...,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class,pilots,films,created,edited,url
0,0,0,Luke Skywalker,172,77,blond,fair,blue,19BBY,male,...,,,,,,,,,,
1,1,1,C-3PO,167,75,,gold,yellow,112BBY,,...,,,,,,,,,,
2,2,2,R2-D2,96,32,,"white, blue",red,33BBY,,...,,,,,,,,,,
3,3,3,Darth Vader,202,136,none,white,yellow,41.9BBY,male,...,150,5 days,1.0,105,Starfighter,['https://swapi.dev/api/people/4/'],['https://swapi.dev/api/films/1/'],2014-12-12T11:21:32.991000Z,2014-12-20T21:23:49.889000Z,https://swapi.dev/api/starships/13/
4,4,4,Leia Organa,150,49,brown,light,brown,19BBY,female,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,77,7,Grievous,216,159,none,"brown, white","green, yellow",unknown,male,...,140,7 days,6,unknown,starfighter,"['https://swapi.dev/api/people/10/', 'https://...",['https://swapi.dev/api/films/6/'],2014-12-20T20:38:05.031000Z,2014-12-20T21:23:49.959000Z,https://swapi.dev/api/starships/74/
78,78,8,Tarfful,234,136,brown,brown,blue,unknown,male,...,,,,,,,,,,
79,79,9,Raymus Antilles,188,79,brown,light,brown,unknown,male,...,,,,,,,,,,
80,80,0,Sly Moore,178,48,none,pale,white,unknown,female,...,,,,,,,,,,


### 6. Acquire the Open Power Systems Data for Germany, which has been rapidly expanding its renewable energy production in recent years. The data set includes country-wide totals of electricity consumption, wind power production, and solar power production for 2006-2017. You can get the data here: https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv

In [12]:
# Download the CSV file
url = 'https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv'
response = requests.get(url)

# Save the content of the response as a DataFrame
ops_df = pd.read_csv(url)

# Save the DataFrame to a CSV file
ops_df.to_csv('open_power_sys.csv', index=False)

# Display the first few rows of the DataFrame
ops_df

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.18400,,,
1,2006-01-02,1380.52100,,,
2,2006-01-03,1442.53300,,,
3,2006-01-04,1457.21700,,,
4,2006-01-05,1477.13100,,,
...,...,...,...,...,...
4378,2017-12-27,1263.94091,394.507,16.530,411.037
4379,2017-12-28,1299.86398,506.424,14.162,520.586
4380,2017-12-29,1295.08753,584.277,29.854,614.131
4381,2017-12-30,1215.44897,721.247,7.467,728.714


### 7. Make sure all the work that you have done above is reproducible. That is, you should put the code above into separate functions in the acquire.py file and be able to re-run the functions and get the same data.

In [13]:
def download_data(url):
    response = requests.get(url)
    return response.content

def save_csv(data, filepath):
    with open(filepath, 'wb') as file:
        file.write(data)

def load_csv(filepath):
    return pd.read_csv(filepath)

def main():
    url = 'https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv'
    filepath = 'open_power_sys.csv'

    # Download the data
    data = download_data(url)

    # Save the data to a CSV file
    save_csv(data, filepath)

    # Load the data from the CSV file
    ops_df = load_csv(filepath)

    # Display the first few rows of the DataFrame
    print(ops_df.head())

if __name__ == '__main__':
    main()

         Date  Consumption  Wind  Solar  Wind+Solar
0  2006-01-01     1069.184   NaN    NaN         NaN
1  2006-01-02     1380.521   NaN    NaN         NaN
2  2006-01-03     1442.533   NaN    NaN         NaN
3  2006-01-04     1457.217   NaN    NaN         NaN
4  2006-01-05     1477.131   NaN    NaN         NaN
