In [9]:
#Depedencies
import pandas as pd 
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json
import config 
api_key = config.coin_api_key
#get the current datetime to add to the file
from datetime import datetime
now = datetime.now()
injest_date = now.strftime("%m/%d/%Y %H:%M:%S")
filedate = now.strftime("%m/%d/%Y")

In [10]:
#API call to CoinMarketCap
def api_mapping_call(number_of_entries, api_key ):
  url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/map'
  parameters = {
      'listing_status':'active',
      'limit': number_of_entries,
      'sort': 'cmc_rank'
  }
  headers = {
    'Accepts': 'application/json',
    'X-CMC_PRO_API_KEY': api_key
  }

  session = Session()
  session.headers.update(headers)
  response = session.get(url, params=parameters)
  data = json.loads(response.text)
  return data


In [11]:
data = api_mapping_call(100, api_key)

In [12]:
#grabs section of dictionary 
cmc_data = data['data']
#puts the dict into a DF, now need to expand nested platform column
cmc_df = pd.DataFrame.from_dict(cmc_data, orient='columns')
#expand nest columns
expanded_df = cmc_df['platform'].apply(pd.Series)
#extract columns names
cols = expanded_df.columns
#modify column names to new names
cols_new = [x + "_expanded" for x in cols]
#create a dict mapping with old to new names
mapping = {key1: key2 for key1, key2 in zip(cols, cols_new)}
#implement and rename columns 
expanded_df = expanded_df.rename(columns=mapping)
#expanded Dataframe, when expanding duplicate columns come into play ie 2 ID columns
top_coins_df = pd.concat([cmc_df.drop(['platform'], axis= 1),expanded_df], axis= 1 )
#add column file name
top_coins_df["File_Name"] = "Coin_Ranking_Dim"
#Add column for injested at 
top_coins_df["injest_datetime"] = injest_date
#turn dataframe to CSV
top_coins_csv = top_coins_df.to_csv(index = False )


 - write DF to parquet format
 `trial_write_to_parquet = test.to_parquet(headers = 'none')`
 - write DF to CSV format
`dfasCSV = test.to_csv()`
 - write DF to string format
`dfAsString = test.to_string(header=False, index=False)`


In [13]:
#this works, needed to send it as text !!
#it works too, had to save it as a CSV
from azure.storage.filedatalake import DataLakeServiceClient
# install the following package 
# pip install azure-storage-file-datalake 
# Get the below details from your storage account
storage_account_name = config.storage_account_name
storage_account_key = config.storage_account_key
container_name = "coinlist"
directory_name = "raw"

def write_to_storage(storage_account_name,storage_account_key, container_name, directory_name,dataset,  file_name):
    """Function to write dataframe to storage. Specicify storage account name, storage account key, container name, directory name and file name.
    The fuction will check if container already exists, if it doesn't it will create a new container. If the already exisits, it will write the file to the specified container.
    Dataset must be saved as specified file type(csv,txt,parquet)
    File name must end in specificed file type (csv, txt, parquet)."""

    #convert dataset input to pandas DF
    df = pd.DataFrame([x.split(',') for x in dataset.split('\r\n')])
    #promote first row to headers
    df= df.rename(columns=df.iloc[0]).drop(df.index[0])

    #timestamp when function runs
    injest_date = now.strftime("%m/%d/%Y %H:%M:%S")
    #removed spaces from the datetime, backslashed dictate new folder structure withn ADLS
    trimmed_injest_Date = injest_date.replace( " ", "_").replace(' ', '_').replace("/","_")
    #split the given file name into file title and file type
    file_name_title, file_type = file_name.split('.')
    #created storage file as it will appear in ADLS
    storage_file = ("{}_{}.{}").format(file_name_title, trimmed_injest_Date, file_type)
    

    service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
            "https", storage_account_name), credential=storage_account_key)
    try:
        file_system_client = service_client.create_file_system(file_system=container_name)
        dir_client = file_system_client.get_directory_client(directory_name)
        dir_client.create_directory()
        #set data to appropriate dataframe
        file = dataset
        file_client = dir_client.create_file(storage_file)
        file_client.append_data(file, 0, len(file))
        file_client.flush_data(len(file))
    except  :
        #ResourceAlreadyExists
        file_system_client = service_client.get_file_system_client(file_system=container_name)
        dir_client = file_system_client.get_directory_client(directory_name)
        dir_client.create_directory()
        #set data to appropriate dataframe
        file = dataset
        file_client = dir_client.create_file(storage_file)
        file_client.append_data(file, 0, len(file))
        file_client.flush_data(len(file))   


    return df


In [14]:
dataframe = write_to_storage(storage_account_name=storage_account_name, storage_account_key= storage_account_key, container_name= container_name, directory_name= directory_name,\
    dataset = top_coins_csv, file_name= "top100coins.csv")

### -----------------------------------------------------------------------------------------------------------------------------------------

In [15]:
#filters out null names
#needed to ensure that dataset was in CSV format
#filters out null names
dataframe = dataframe[dataframe.name.notnull()]
#make column selections
desired_columns = ["id", "name", "symbol", "rank","File_Name", "injest_datetime"]
#filter dataset for desired columns
refined_dataframe = dataframe[desired_columns]
#convert dataframe to CSV to write to storage
refined_dataframe_csv = refined_dataframe.to_csv(index = False )

In [16]:
#Call function to write to storage and save output dataframe 
refined_coin_list = write_to_storage(storage_account_name=storage_account_name, storage_account_key= storage_account_key, container_name= "coinlist", directory_name= "refined",\
    dataset = refined_dataframe_csv, file_name= "top100coins.csv")