# Data retrieval from localbitcoins API

In [1]:
import requests
import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime, timezone

In [2]:
def make_request(url_base,url):
    r = requests.get(url_base+url)
    file_json = r.json()
    return file_json

def json_to_pandas(file_json:dict):
    return pd.DataFrame.from_dict(file_json)


### Calculation exchange rate of reference to further math on pct_change:
def exchange_reference(df, date_reference):
    
    memory = {}
        
    for currency_code in df["currency_code"].unique():
        
        if currency_code != "VED":
            date_reference = date_reference
        else:
            date_reference = "2021-10-10"

    
        ## Select currency for calculations:
        df_exchange = df[df['currency_code'] == currency_code ]

        ## Select date to calculate reference

        df_date_reference = df_exchange.loc[(df_exchange.date).dt.date == pd.Timestamp(date_reference)]

        ### Averare exchange rate for day of reference:

        implicit_exchange_reference = df_date_reference.implicit_exchange.mean()
        
        memory[currency_code] = implicit_exchange_reference

    ### Percentual Variation

#     equation = ((df_exchange.implicit_exchange / implicit_exchange_reference)-1)*100
    
    
    return memory

## Currencies

In [3]:
url_base = 'https://localbitcoins.com/'
# Currencies:
url = "api/currencies"
currency_code = make_request(url_base,url)
df_currency_code = json_to_pandas(currency_code['data'])
# df_currency_code.info()
# df_currency_code.head()


In [4]:
## Extract country information related to currency
currency_name = []
currency_is_altcoin = []
for index, row in df_currency_code.iterrows():
    currency_name.append(row[0]["name"])
    currency_is_altcoin.append(row[0]["altcoin"])
    
## Reorganize on df

df_currency_code['currencies'] = currency_name
df_currency_code['altcoin'] = currency_is_altcoin
df_currency_code = df_currency_code.reset_index()

df_currency_code.head()

Unnamed: 0,index,currencies,currency_count,altcoin
0,ADA,Cardano (ADA),183,True
1,AED,United Arab Emirates Dirham (AED),183,False
2,AFN,Afghan Afghani (AFN),183,False
3,ALL,Albanian Lek (ALL),183,False
4,AMD,Armenian Dram (AMD),183,False


### Average price of btc by country (in local currency)

In [5]:
url = "bitcoinaverage/ticker-all-currencies/"
price_btc = make_request(url_base,url)
time_stamp = datetime.now().timestamp()

In [6]:
df_price_btc = json_to_pandas(price_btc)

#transpose data frame. Index is now by Country:

df_price_btc = df_price_btc.T
df_price_btc['time_stamp'] = time_stamp
df_price_btc = df_price_btc.reset_index()
print(df_price_btc.info())
df_price_btc.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   index       77 non-null     object 
 1   volume_btc  77 non-null     object 
 2   rates       77 non-null     object 
 3   avg_1h      39 non-null     object 
 4   avg_6h      59 non-null     object 
 5   avg_12h     69 non-null     object 
 6   avg_24h     77 non-null     object 
 7   time_stamp  77 non-null     float64
dtypes: float64(1), object(7)
memory usage: 4.9+ KB
None


Unnamed: 0,index,volume_btc,rates,avg_1h,avg_6h,avg_12h,avg_24h,time_stamp
0,USD,3.08468295,{'last': '59100.02'},65695.55,62834.08,64093.92,61628.66,1637249000.0
1,EUR,2.64808807,{'last': '62539.09'},56238.21,54131.46,54077.42,54298.92,1637249000.0
2,GBP,2.51789859,{'last': '47568.31'},43999.35,43787.72,43798.63,44145.11,1637249000.0
3,COP,3.84287777,{'last': '229425358.88'},228927739.99,229465024.21,230463734.47,230393106.09,1637249000.0
4,VED,3.7408461,{'last': '288249.23'},282964.9,281729.56,281798.56,2349665524.16,1637249000.0


## Merge and clean dataframe

In [7]:
df_market = df_price_btc.merge(df_currency_code, how='right')

# Drop the rows with NAN values in time_stamp
df_market.dropna(subset=['time_stamp'], inplace=True)

# Add date from time_stamp
df_market['date'] = datetime.fromtimestamp(time_stamp, tz=timezone.utc)
df_market = df_market[['time_stamp','date','index','currencies','volume_btc','avg_24h','avg_12h','avg_6h','avg_1h','altcoin']]
df_market.rename(columns = {'index':'currency_code'}, inplace = True)
print(df_market.info())
df_market.head(10)


### I should include some data formatting so info is stored in the right format

<class 'pandas.core.frame.DataFrame'>
Int64Index: 77 entries, 0 to 181
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   time_stamp     77 non-null     float64            
 1   date           77 non-null     datetime64[ns, UTC]
 2   currency_code  77 non-null     object             
 3   currencies     77 non-null     object             
 4   volume_btc     77 non-null     object             
 5   avg_24h        77 non-null     object             
 6   avg_12h        69 non-null     object             
 7   avg_6h         59 non-null     object             
 8   avg_1h         39 non-null     object             
 9   altcoin        77 non-null     bool               
dtypes: bool(1), datetime64[ns, UTC](1), float64(1), object(7)
memory usage: 6.1+ KB
None


Unnamed: 0,time_stamp,date,currency_code,currencies,volume_btc,avg_24h,avg_12h,avg_6h,avg_1h,altcoin
0,1637249000.0,2021-11-18 15:27:37.595203+00:00,ADA,Cardano (ADA),0.00302647,33041.79,33041.79,33041.79,,True
1,1637249000.0,2021-11-18 15:27:37.595203+00:00,AED,United Arab Emirates Dirham (AED),0.10320225,213658.13,213658.13,213658.13,213168.54,False
6,1637249000.0,2021-11-18 15:27:37.595203+00:00,AOA,Angolan Kwanza (AOA),0.00346789,34603173.69,34603173.69,,,False
7,1637249000.0,2021-11-18 15:27:37.595203+00:00,ARS,Argentine Peso (ARS),0.39053995,12456013.68,12577517.43,12571577.39,12252043.78,False
8,1637249000.0,2021-11-18 15:27:37.595203+00:00,AUD,Australian Dollar (AUD),0.31431455,86741.26,86465.02,87024.08,,False
14,1637249000.0,2021-11-18 15:27:37.595203+00:00,BDT,Bangladeshi Taka (BDT),0.01285536,4985080.16,4985080.16,5735495.32,,False
20,1637249000.0,2021-11-18 15:27:37.595203+00:00,BOB,Bolivian Boliviano (BOB),0.07107757,438338.28,419772.21,419772.21,413507.93,False
21,1637249000.0,2021-11-18 15:27:37.595203+00:00,BRL,Brazilian Real (BRL),0.38470359,337141.67,341507.58,339961.69,339802.04,False
24,1637249000.0,2021-11-18 15:27:37.595203+00:00,BWP,Botswanan Pula (BWP),0.00148266,809356.16,,,,False
25,1637249000.0,2021-11-18 15:27:37.595203+00:00,BYN,Belarusian Ruble (BYN),0.25521029,152689.18,152443.8,153491.32,153492.79,False


In [8]:
## Adding avg_24h exchange rate USD/BTC for inmediate comparison:

df_market["avg_24h_usd"] = df_market[df_market['currency_code']=="USD"].avg_24h.tolist()[0]

In [9]:
### Formatting data types:

# Float values:
for column_label in ['volume_btc','avg_24h','avg_12h','avg_6h','avg_1h','avg_24h_usd']:

    df_market[column_label] = pd.to_numeric(df_market[column_label])

df_market.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 77 entries, 0 to 181
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   time_stamp     77 non-null     float64            
 1   date           77 non-null     datetime64[ns, UTC]
 2   currency_code  77 non-null     object             
 3   currencies     77 non-null     object             
 4   volume_btc     77 non-null     float64            
 5   avg_24h        77 non-null     float64            
 6   avg_12h        69 non-null     float64            
 7   avg_6h         59 non-null     float64            
 8   avg_1h         39 non-null     float64            
 9   altcoin        77 non-null     bool               
 10  avg_24h_usd    77 non-null     float64            
dtypes: bool(1), datetime64[ns, UTC](1), float64(7), object(2)
memory usage: 6.7+ KB


In [10]:
## Calculating implicit exchange rate:
df_market['implicit_exchange'] = df_market['avg_24h'].divide(df_market["avg_24h_usd"], fill_value = None)

In [11]:
# Drop the rows with NAN values in time_stamp
df_market.dropna(subset=['time_stamp'], inplace=True)

In [12]:
df_market.head(2)

Unnamed: 0,time_stamp,date,currency_code,currencies,volume_btc,avg_24h,avg_12h,avg_6h,avg_1h,altcoin,avg_24h_usd,implicit_exchange
0,1637249000.0,2021-11-18 15:27:37.595203+00:00,ADA,Cardano (ADA),0.003026,33041.79,33041.79,33041.79,,True,61628.66,0.536143
1,1637249000.0,2021-11-18 15:27:37.595203+00:00,AED,United Arab Emirates Dirham (AED),0.103202,213658.13,213658.13,213658.13,213168.54,False,61628.66,3.466863


## Save information in .csv and sql database

In [13]:
# Saving csv
df_market.to_csv(f'./data/csv/{time_stamp}.csv',index=False)

In [14]:
# Append information on sqlite database

from sqlalchemy import Column, Integer, Float, String, DateTime,Boolean, ForeignKey
from sqlalchemy.orm import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

engine = create_engine('sqlite:///data/CurrenciesDataBase.db')
print(type(engine))
df_market.to_sql(name="currencies_vs_btc", con=engine, if_exists="append", index=False)

<class 'sqlalchemy.engine.base.Engine'>


#### Calculation of pct_change taking 2021-10-03 as date of reference

In [15]:
path_to_currencies = "sqlite:///data/CurrenciesDataBase.db"
df_market = pd.read_sql('currencies_vs_btc', path_to_currencies)

### The pct calculations are not properly saved on the dataset unless I upload one previously saved. Why?
# The pipeline defined solve the issue by saving the calculated data on a new dataset but the calculations
# are done in the full dataset everytime the code runs.
# This practice is not scalable

# The pct could be calculated on the web interface for a given date selected by the users but this implies 
# more processing time

In [16]:
df = df_market.groupby(by=["date","currency_code"]).mean()
df.reset_index(inplace=True)

### Dataset with implicit exchange of reference by currency:
date_reference = "2021-10-03"
exchange = exchange_reference(df, date_reference)
exchange = pd.DataFrame.from_dict(exchange, orient="index")

df['pct'] = df.apply(lambda x : ((x['implicit_exchange']/exchange.loc[x["currency_code"]])-1)*100, axis = 1 )

### Drop Null values:

df.dropna(subset=['pct'], inplace=True)

  result = libops.scalar_compare(x.ravel(), y, op)


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7327 entries, 0 to 7557
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   date               7327 non-null   datetime64[ns]
 1   currency_code      7327 non-null   object        
 2   time_stamp         7327 non-null   float64       
 3   volume_btc         7327 non-null   float64       
 4   avg_24h            7327 non-null   float64       
 5   avg_12h            6708 non-null   float64       
 6   avg_6h             5974 non-null   float64       
 7   avg_1h             3575 non-null   float64       
 8   altcoin            7327 non-null   float64       
 9   avg_24h_usd        7327 non-null   float64       
 10  implicit_exchange  7327 non-null   float64       
 11  pct                7327 non-null   float64       
dtypes: datetime64[ns](1), float64(10), object(1)
memory usage: 744.1+ KB


In [18]:
### Saving on DB Version 2

engine = create_engine('sqlite:///data/CurrenciesDataBase_V2')
print(type(engine))
df.to_sql(name="currencies_vs_btc", con=engine, if_exists="append", index=False)

<class 'sqlalchemy.engine.base.Engine'>


In [21]:
now = dt.datetime.now()
print("Last Execution: ")
print(str(now))
print('Completed')

Last Execution: 
2021-11-18 16:28:22.467134
Completed
