In [54]:
import requests
import json
import pandas as pd
import numpy as np
import re
from azure.storage.blob import BlobServiceClient
pd.options.mode.chained_assignment = None

In [2]:
# get data from SWAPI.dev

def get_data(name: str, size: int) -> pd.DataFrame:
    
    records = []
    
    for i in range(1, size):
        url = f'https://swapi.dev/api/{name}/{i}/'
        r = requests.get(url)
        data = r.json()
        records.append(data)
    
    df = pd.DataFrame.from_records(records)
    
    return df

In [3]:
chars_help_raw = get_data('people', 84) # charaters helper dataframe since the big one has some characters missing in it
species_raw = get_data('species', 38)
planets_raw = get_data('planets', 61)
starships_raw = get_data('starships', 18)
vehicles_raw = get_data('vehicles', 63)
chars_raw = pd.read_parquet('StarWars_Characters.parquet')

In [4]:
# function to insert id column for future joins

def insert_id_col(df: pd.DataFrame) -> pd.DataFrame:
    
    lenght = len(df)+1
    df.reset_index(drop=True, inplace=True)
    df.insert(0, 'id', range(1, lenght))
    df = df.set_index('id')
    
    return df

In [5]:
chars_raw.head(3)

Unnamed: 0,name,key,url,description,species_2nd,species_3rd,species,home_world,gender,height,eye_color,skin_color,hair_color,weight
0,1138 (First Order),1138_(First_Order),https://starwars.fandom.com/wiki/1138_(First_O...,1138 was a human male stormtrooper who served ...,,,Human,,Male,,,,,
1,1151,1151,https://starwars.fandom.com/wiki/1151,1151 was a clone cadet who was a part of the C...,,,Human,Kamino,Male,,Brown,Tan,Black,
2,1174,1174,https://starwars.fandom.com/wiki/1174,"1174, nicknamed ""Whiplash,"" was a clone troope...",,,Human,Kamino,Male,,Brown,Tan,Black,


In [6]:
species_raw.head(3)

Unnamed: 0,name,classification,designation,average_height,skin_colors,hair_colors,eye_colors,average_lifespan,homeworld,language,people,films,created,edited,url
0,Human,mammal,sentient,180.0,"caucasian, black, asian, hispanic","blonde, brown, black, red","brown, blue, green, hazel, grey, amber",120,https://swapi.dev/api/planets/9/,Galactic Basic,"[https://swapi.dev/api/people/66/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T13:52:11.567000Z,2014-12-20T21:36:42.136000Z,https://swapi.dev/api/species/1/
1,Droid,artificial,sentient,,,,,indefinite,,,"[https://swapi.dev/api/people/2/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:16:16.259000Z,2014-12-20T21:36:42.139000Z,https://swapi.dev/api/species/2/
2,Wookie,mammal,sentient,210.0,gray,"black, brown","blue, green, yellow, brown, golden, red",400,https://swapi.dev/api/planets/14/,Shyriiwook,"[https://swapi.dev/api/people/13/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T16:44:31.486000Z,2014-12-20T21:36:42.142000Z,https://swapi.dev/api/species/3/


In [7]:
planets_raw.head(3)

Unnamed: 0,name,rotation_period,orbital_period,diameter,climate,gravity,terrain,surface_water,population,residents,films,created,edited,url
0,Tatooine,23,304,10465,arid,1 standard,desert,1,200000,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-09T13:50:49.641000Z,2014-12-20T20:58:18.411000Z,https://swapi.dev/api/planets/1/
1,Alderaan,24,364,12500,temperate,1 standard,"grasslands, mountains",40,2000000000,"[https://swapi.dev/api/people/5/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T11:35:48.479000Z,2014-12-20T20:58:18.420000Z,https://swapi.dev/api/planets/2/
2,Yavin IV,24,4818,10200,"temperate, tropical",1 standard,"jungle, rainforests",8,1000,[],[https://swapi.dev/api/films/1/],2014-12-10T11:37:19.144000Z,2014-12-20T20:58:18.421000Z,https://swapi.dev/api/planets/3/


In [8]:
starships_raw.head(3)

Unnamed: 0,detail,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class,pilots,films,created,edited,url
0,Not found,,,,,,,,,,,,,,,,,,
1,,CR90 corvette,CR90 corvette,Corellian Engineering Corporation,3500000.0,150.0,950.0,30-165,600.0,3000000.0,1 year,2.0,60.0,corvette,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T14:20:33.369000Z,2014-12-20T21:23:49.867000Z,https://swapi.dev/api/starships/2/
2,,Star Destroyer,Imperial I-class Star Destroyer,Kuat Drive Yards,150000000.0,1600.0,975.0,47060,,36000000.0,2 years,2.0,60.0,Star Destroyer,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:08:19.848000Z,2014-12-20T21:23:49.870000Z,https://swapi.dev/api/starships/3/


In [9]:
vehicles_raw.head(3)

Unnamed: 0,detail,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,vehicle_class,pilots,films,created,edited,url
0,Not found,,,,,,,,,,,,,,,,
1,Not found,,,,,,,,,,,,,,,,
2,Not found,,,,,,,,,,,,,,,,


### Characters dataframe

In [61]:
# drop unwanted columns, fill missing values with 'None'

chars_col_drop = chars_raw.drop(['key', 'url', 'description', 'species_2nd', 'species_3rd'], axis=1)
chars_col_and_na_drop = chars_col_drop.fillna('None') 
characters_df = chars_col_and_na_drop.copy()

In [62]:
characters_df.isna().sum()

name          0
species       0
home_world    0
gender        0
height        0
eye_color     0
skin_color    0
hair_color    0
weight        0
dtype: int64

In [63]:
# drop unwanted columns from helper df and drop missing values

chars_help_drop_col = chars_help_raw.drop(['films', 'vehicles', 'starships',
                                          'created', 'edited', 'url', 'detail'], axis=1)

chars_help_drop_col_dropna = chars_help_drop_col.dropna()

In [64]:
# clean helper df, map species

chars_help_drop_col_dropna['homeworld'] = chars_help_drop_col_dropna['homeworld'].str.replace('https://swapi.dev/api/planets/', "")
chars_help_drop_col_dropna['homeworld'] = chars_help_drop_col_dropna['homeworld'].str.rstrip('/')

chars_help_worlds_map = chars_help_drop_col_dropna.copy()
chars_help_worlds_map['species'] = chars_help_worlds_map['species'].map(lambda x: str(x)[32:-3])

  chars_help_drop_col_dropna['homeworld'] = chars_help_drop_col_dropna['homeworld'].str.replace('https://swapi.dev/api/planets/', "")


In [65]:
# insert id column

species_with_id = insert_id_col(species_raw)

ValueError: cannot insert id, already exists

In [66]:
#make scpecies dictionary

species_dict = pd.Series(species_with_id['name'].values, index=species_with_id.index).to_dict()

chars_help_worlds_map['species'] = chars_help_worlds_map['species'].replace('', '1')
chars_help_worlds_map['species'] = chars_help_worlds_map['species'].astype('int')
chars_help_worlds_map['species'] = chars_help_worlds_map['species'].map(species_dict)

In [67]:
# make planets dictionary and map planets in helper df

planets_with_id = insert_id_col(planets_raw)
planets_dict = pd.Series(planets_with_id['name'].values, index=planets_with_id.index).to_dict()
planets_dict[0] = 'None'

chars_help_worlds_map['homeworld'] = chars_help_worlds_map['homeworld'].astype('int')
chars_help_worlds_map['homeworld'] = chars_help_worlds_map['homeworld'].map(planets_dict)

ValueError: cannot insert id, already exists

In [68]:
# clean helper df

chars_help_worlds_map['gender'] = chars_help_worlds_map['gender'].str.replace('n/a', 'None')
chars_help_worlds_map['gender'] = chars_help_worlds_map['gender'].str.replace('none', 'None')
chars_help_worlds_map['hair_color'] = chars_help_worlds_map['hair_color'].str.replace('n/a', 'None')
chars_help_worlds_map['hair_color'] = chars_help_worlds_map['hair_color'].str.replace('none', 'None')

In [69]:
chars_help_clean = chars_help_worlds_map.copy()

chars_help_clean.columns = ['name', 'height', 'weight', 'hair_color', 
                            'skin_color', 'eye_color', 'birth_year', 
                            'gender', 'home_world', 'species']

cols = characters_df.columns.to_list()
chars_help_df = chars_help_clean[cols]

In [70]:
# concat main characters df with helper df

chars_combine = pd.concat([characters_df, chars_help_df], axis=0)
chars_combine = chars_combine.reset_index(drop=True)

chars_combine['gender'] = chars_combine['gender'].apply(lambda x: x.title())
chars_combine['eye_color'] = chars_combine['eye_color'].apply(lambda x: x.title())
chars_combine['skin_color'] = chars_combine['skin_color'].apply(lambda x: x.title())
chars_combine['hair_color'] = chars_combine['hair_color'].apply(lambda x: x.title())

In [71]:
# clean weight and height

chars_combine[['weight', 'height']] = chars_combine[['weight', 'height']].replace('unknown', '0')
chars_combine[['weight', 'height']] = chars_combine[['weight', 'height']].replace('None', '0')
chars_combine['height'] = chars_combine['height'].str.replace(r'.', r'', regex=False)
chars_combine['weight'] = chars_combine['weight'].str.replace(r'.', r'', regex=False)
chars_combine['weight'] = chars_combine['weight'].str.replace(r',', r'', regex=False)
chars_combine['height'] = chars_combine['height'].fillna('0', axis=0)
chars_combine['weight'] = chars_combine['weight'].apply(lambda x: x.strip())

chars_combine['weight'] = chars_combine['weight'].apply(lambda x: ''.join(letter for letter in x.split() if x.isdigit()))
# ^ creates ''
chars_combine.loc[chars_combine['weight'] == ''] = '0'

chars_combine['height'] = chars_combine['height'].apply(lambda x: ''.join(letter for letter in x.split() if x.isdigit()))

In [72]:
# convert weight and height to int

chars_combine[['weight', 'height']] = chars_combine[['weight', 'height']].astype('int')

In [73]:
chars_combine.dtypes

name          object
species       object
home_world    object
gender        object
height         int32
eye_color     object
skin_color    object
hair_color    object
weight         int32
dtype: object

In [74]:
# clean errors

chars_with_zeros = chars_combine.loc[chars_combine['name'] == '0'].index

In [75]:
chars_clean = chars_combine.drop(labels=chars_with_zeros, axis=0)

In [76]:
# insert id column

chars_df_final = insert_id_col(chars_clean)
chars_df_final.shape

(5340, 9)

### Species dataframe

In [26]:
species_raw.sample(5)

Unnamed: 0,id,name,classification,designation,average_height,skin_colors,hair_colors,eye_colors,average_lifespan,homeworld,language,people,films,created,edited,url
5,6,Yoda's species,mammal,sentient,66.0,"green, yellow","brown, white","brown, green, yellow",900,https://swapi.dev/api/planets/28/,Galactic basic,[https://swapi.dev/api/people/20/],"[https://swapi.dev/api/films/2/, https://swapi...",2014-12-15T12:27:22.877000Z,2014-12-20T21:36:42.148000Z,https://swapi.dev/api/species/6/
27,28,Geonosian,insectoid,sentient,178.0,"green, brown",none,"green, hazel",unknown,https://swapi.dev/api/planets/11/,Geonosian,[https://swapi.dev/api/people/63/],"[https://swapi.dev/api/films/5/, https://swapi...",2014-12-20T16:40:45.618000Z,2014-12-20T21:36:42.195000Z,https://swapi.dev/api/species/28/
11,12,Gungan,amphibian,sentient,190.0,"brown, green",none,orange,unknown,https://swapi.dev/api/planets/8/,Gungan basic,"[https://swapi.dev/api/people/36/, https://swa...","[https://swapi.dev/api/films/4/, https://swapi...",2014-12-19T17:30:37.341000Z,2014-12-20T21:36:42.163000Z,https://swapi.dev/api/species/12/
16,17,Vulptereen,unknown,sentient,100.0,grey,none,yellow,unknown,https://swapi.dev/api/planets/39/,vulpterish,[https://swapi.dev/api/people/48/],[https://swapi.dev/api/films/4/],2014-12-20T09:57:33.128000Z,2014-12-20T21:36:42.173000Z,https://swapi.dev/api/species/17/
1,2,Droid,artificial,sentient,,,,,indefinite,,,"[https://swapi.dev/api/people/2/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:16:16.259000Z,2014-12-20T21:36:42.139000Z,https://swapi.dev/api/species/2/


In [27]:
# drop unwanted columns, clean data and fill missing values

species_drop = species_raw.drop(['people', 'films', 'created', 'edited', 'url'], axis=1)

species_drop['homeworld'] = species_drop['homeworld'].str.replace('https://swapi.dev/api/planets/', '')
species_drop['homeworld'] = species_drop['homeworld'].str.rstrip('/')

  species_drop['homeworld'] = species_drop['homeworld'].str.replace('https://swapi.dev/api/planets/', '')


In [28]:
species_drop['homeworld'].fillna('0', axis=0, inplace=True)
species_drop['homeworld'] = species_drop['homeworld'].astype('int').map(planets_dict)

In [29]:
species_mapped = species_drop.copy()

In [30]:
species_mapped['average_height'].replace(['unknown', 'n/a'], '0', inplace=True)
species_mapped['average_height'] = species_mapped['average_height'].astype('int')

species_mapped['average_lifespan'].replace('unknown', '0', inplace=True)
species_mapped['average_lifespan'].replace('indefinite', '9999', inplace=True)
species_mapped['average_lifespan'] = species_mapped['average_lifespan'].astype('int')

In [31]:
species_fillna = species_mapped.copy()

In [32]:
species_fillna['skin_colors'].replace('n/a', 'none', inplace=True)
species_fillna['hair_colors'].replace('n/a', 'none', inplace=True)
species_fillna['eye_colors'].replace('n/a', 'none', inplace=True)
species_fillna['language'].replace('n/a', 'none', inplace=True)

In [33]:
for col in species_fillna.columns:
    if species_fillna[col].dtype == 'object':
        species_fillna[col] = species_fillna[col].apply(lambda x: x.title())

In [77]:
# insert id column in species final df

species_df = species_fillna.copy()
species_df.drop('id', axis=1, inplace=True)
species_df_final = insert_id_col(species_df)
species_df_final.shape

(37, 10)

### Planets dataframe

In [35]:
planets_with_id.head(3)

Unnamed: 0_level_0,name,rotation_period,orbital_period,diameter,climate,gravity,terrain,surface_water,population,residents,films,created,edited,url
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Tatooine,23,304,10465,arid,1 standard,desert,1,200000,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-09T13:50:49.641000Z,2014-12-20T20:58:18.411000Z,https://swapi.dev/api/planets/1/
2,Alderaan,24,364,12500,temperate,1 standard,"grasslands, mountains",40,2000000000,"[https://swapi.dev/api/people/5/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T11:35:48.479000Z,2014-12-20T20:58:18.420000Z,https://swapi.dev/api/planets/2/
3,Yavin IV,24,4818,10200,"temperate, tropical",1 standard,"jungle, rainforests",8,1000,[],[https://swapi.dev/api/films/1/],2014-12-10T11:37:19.144000Z,2014-12-20T20:58:18.421000Z,https://swapi.dev/api/planets/3/


In [36]:
planets_drop = planets_with_id.drop(['residents', 'films', 'created', 'edited', 'url'], axis=1)

In [37]:
# drop this record

planets_drop[planets_drop['rotation_period'] == '0']

Unnamed: 0_level_0,name,rotation_period,orbital_period,diameter,climate,gravity,terrain,surface_water,population
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
28,unknown,0,0,0,unknown,unknown,unknown,unknown,unknown


In [38]:
planets_drop.drop([28], axis=0, inplace=True)

In [39]:
# clean data

planet_unknown_cols = ['rotation_period', 'orbital_period', 'diameter', 'surface_water', 'population']

for col in planet_unknown_cols:
    planets_drop[col].replace('unknown', '0', inplace=True)
    
planet_cols_to_int = ['rotation_period', 'orbital_period', 'diameter']

for col in planet_cols_to_int:
    planets_drop[col] = planets_drop[col].astype('int')

planets_drop['surface_water'] = planets_drop['surface_water'].astype('float')

In [40]:
# first convert population to float, int too large

planets_drop['population'] = planets_drop['population'].astype('float').apply(lambda x: x / 1000)
planets_drop['population'] = planets_drop['population'].astype('int')

planets_drop.rename({'population': 'population (thousands)'}, axis=1, inplace=True)

In [79]:
# insert id column

planets_df = planets_drop.copy()
planets_df_final = insert_id_col(planets_df)
planets_df_final.shape

(59, 9)

### Starships dataframe

In [42]:
starships_raw.head(3)

Unnamed: 0,detail,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class,pilots,films,created,edited,url
0,Not found,,,,,,,,,,,,,,,,,,
1,,CR90 corvette,CR90 corvette,Corellian Engineering Corporation,3500000.0,150.0,950.0,30-165,600.0,3000000.0,1 year,2.0,60.0,corvette,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T14:20:33.369000Z,2014-12-20T21:23:49.867000Z,https://swapi.dev/api/starships/2/
2,,Star Destroyer,Imperial I-class Star Destroyer,Kuat Drive Yards,150000000.0,1600.0,975.0,47060,,36000000.0,2 years,2.0,60.0,Star Destroyer,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:08:19.848000Z,2014-12-20T21:23:49.870000Z,https://swapi.dev/api/starships/3/


In [43]:
# drop unwanted columns and missing values

starships_drop = starships_raw.drop(['detail', 'pilots', 'films', 'created', 'edited', 'url'], axis=1)
starships_drop.loc[starships_drop['name'].isna()]

Unnamed: 0,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class
0,,,,,,,,,,,,,
3,,,,,,,,,,,,,
5,,,,,,,,,,,,,
6,,,,,,,,,,,,,
7,,,,,,,,,,,,,
13,,,,,,,,,,,,,
15,,,,,,,,,,,,,


In [44]:
starships_drop.dropna(axis=0, how='all', inplace=True)

In [45]:
# clean the data and change some column types to numerics

starships_drop['cost_in_credits'].replace('unknown', '0', inplace=True)
starships_drop['length'] = starships_drop['length'].str.replace(',', '.', regex=False)
starships_drop['length'] = starships_drop['length'].str.replace('.', '', regex=False)
starships_drop['max_atmosphering_speed'].replace('n/a', '0', inplace=True)
starships_drop['crew'] = starships_drop['crew'].str.replace(',', '')
starships_drop['passengers'] = starships_drop['passengers'].str.replace('n/a', '0')
starships_drop['passengers'] = starships_drop['passengers'].str.replace(',', '')
starships_drop[['crew'][0]][1] = '165'
starships_drop[['max_atmosphering_speed'][0]][10] = '1000'

In [46]:
starships_astype = starships_drop.copy()

In [47]:
starships_col_to_int = ['length', 'max_atmosphering_speed', 
                        'crew', 'passengers', 'MGLT']

starships_col_to_float = ['cost_in_credits', 'cargo_capacity']

starships_astype[starships_col_to_int] = starships_astype[starships_col_to_int].astype('int')
starships_astype[starships_col_to_float] = starships_astype[starships_col_to_float].astype('float')

In [48]:
starships_astype.dtypes

name                       object
model                      object
manufacturer               object
cost_in_credits           float64
length                      int32
max_atmosphering_speed      int32
crew                        int32
passengers                  int32
cargo_capacity            float64
consumables                object
hyperdrive_rating          object
MGLT                        int32
starship_class             object
dtype: object

In [80]:
# insert id column

starships_df = starships_astype.rename({'crew': 'crew_max'}, axis=1)
starships_df_final = insert_id_col(starships_df)
starships_df_final.shape

(10, 13)

### Vehicles dataframe

In [82]:
vehicles_raw.head()

Unnamed: 0,detail,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,vehicle_class,pilots,films,created,edited,url
0,Not found,,,,,,,,,,,,,,,,
1,Not found,,,,,,,,,,,,,,,,
2,Not found,,,,,,,,,,,,,,,,
3,,Sand Crawler,Digger Crawler,Corellia Mining Corporation,150000.0,36.8,30.0,46.0,30.0,50000.0,2 months,wheeled,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:36:25.724000Z,2014-12-20T21:30:21.661000Z,https://swapi.dev/api/vehicles/4/
4,Not found,,,,,,,,,,,,,,,,


In [83]:
# drop unwanted columns, fill missing values and cast numeric columns as int and float

vehicles_drop = vehicles_raw.drop(['detail', 'pilots' , 'films' , 'created' , 'edited' , 'url'], axis=1)
vehicles_drop.dropna(inplace=True)

vehicles_unknown_cols = ['cost_in_credits', 'length', 'max_atmosphering_speed', 'crew', 'passengers', 'cargo_capacity']
vehicles_int_cols = ['cost_in_credits', 'max_atmosphering_speed', 'crew', 'passengers', 'cargo_capacity']
for col in vehicles_unknown_cols:
    vehicles_drop[col] = vehicles_drop[col].str.replace('unknown', '0')
    
vehicles_drop['cargo_capacity'].replace('none', '0', inplace=True)

vehicles_drop[vehicles_int_cols] = vehicles_drop[vehicles_int_cols].astype('int')
vehicles_drop['length'] = vehicles_drop['length'].astype('float')

In [84]:
vehicles_df_final = insert_id_col(vehicles_drop)
vehicles_df_final.shape

(32, 11)

## Send CSV to Azure Blob

In [58]:
# provide connection string to Azure Blob Storage Account
connection_string: '****'

In [57]:
# function for uploading csv's

def upload_csv_to_blob(connection_string: str, container_name: str, df: pd.DataFrame):

    
    # Instantiate a new BlobServiceClient using a connection string
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Instantiate a new ContainerClient
    container_client = blob_service_client.get_container_client(f'{container_name}')
    try:
       # Create new Container in the service
       container_client.create_container()
       properties = container_client.get_container_properties()
    except ResourceExistsError:
       print("Container already exists.")

    output = df.to_csv(index_label='id', encoding='utf-8')
    
    # Instantiate a new BlobClient
    blob_client = container_client.get_blob_client(f"{container_name}.csv")
    
    # upload data
    blob_client.upload_blob(output, blob_type="BlockBlob")

In [85]:
# upload csv's to blob storage

upload_csv_to_blob(connection_string=connection_string, container_name='characters', df=chars_df_final)
upload_csv_to_blob(connection_string=connection_string, container_name='species', df=species_df_final)
upload_csv_to_blob(connection_string=connection_string, container_name='planets', df=planets_df_final)
upload_csv_to_blob(connection_string=connection_string, container_name='starships', df=starships_df_final)
upload_csv_to_blob(connection_string=connection_string, container_name='vehicles', df=vehicles_df_final)