In [1]:
pip install requests beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


# Importing libraries

In [2]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Step 1: Scraping Data
# Step 9: OOP Concepts

In [37]:
def scrape_crypto_prices(crypto_symbol):
    # Construct URL for crypto price data
    url = "https://data.messari.io/api/v1/assets/{}/metrics/price/time-series".format(crypto_symbol)

    try:
        # Make a request to the API
        response = requests.get(url, verify=True)
        response.raise_for_status()

        # Extract price data from the response
        data = response.json()['data']['values']
        
        # Create a DataFrame from the data
        df = pd.DataFrame(data)
        
        return df
    except requests.exceptions.RequestException as e:
        # Handle request exceptions
        print("Error fetching data for {}: {}".format(crypto_symbol, e))
        return None

# List of cryptocurrencies to scrape
cryptos = ['bitcoin', 'ethereum', 'xrp']

# Dictionary to store crypto data
crypto_data = {crypto: scrape_crypto_prices(crypto) for crypto in cryptos}

# Process and save data for each cryptocurrency
for crypto, df in crypto_data.items():
    csv_filename = "{}_data.csv".format(crypto)
    print("\n{} Data:".format(crypto.capitalize()))
    if df is not None:
        # Print the first few rows of the DataFrame
        print(df.head())

        # Rename columns for clarity
        df.columns = ['time', 'open_price', 'close_price', 'high', 'low', 'volume']

        # Save data to a CSV file
        df.to_csv(csv_filename, index=False)
        print("Data saved to {}".format(csv_filename))
    else:
        print("Failed to fetch data for {}".format(crypto))


Bitcoin Data:
               0             1             2             3             4  \
0  1700265600000  36619.768859  36632.329998  36401.793963  36406.884027   
1  1700269200000  36411.495222  36474.484392  36398.150116  36420.878256   
2  1700272800000  36424.550083  36462.474380  36416.855645  36424.394585   
3  1700276400000  36426.568890  36440.972455  36367.420248  36390.468414   
4  1700280000000  36388.013729  36441.350811  36373.474967  36378.598216   

              5  
0  2.510288e+08  
1  1.496349e+08  
2  1.103550e+08  
3  1.327806e+08  
4  1.101722e+08  
Data saved to bitcoin_data.csv

Ethereum Data:
               0            1            2            3            4  \
0  1700265600000  1961.561909  1961.720964  1953.226911  1954.740351   
1  1700269200000  1955.092290  1958.164162  1950.570826  1950.570826   
2  1700272800000  1950.648895  1956.174492  1950.421272  1950.449739   
3  1700276400000  1950.618693  1950.695075  1940.373139  1941.621433   
4  1700280000

# Step 2: Data Cleaning and Structuring
# Step 7:Save Results

In [39]:
class CryptoData:
    def __init__(self, crypto_symbol):
        # Initialize CryptoData object for a specific cryptocurrency
        self.crypto_symbol = crypto_symbol
        self.data = self.scrape_data()
        
        # If data is successfully fetched, clean it up
        if self.data is not None:
            self.clean_data()

    def scrape_data(self):
        # Construct URL for fetching crypto data
        url = "https://data.messari.io/api/v1/assets/{}/metrics/price/time-series".format(self.crypto_symbol)

        try:
            # Make a request to the API
            response = requests.get(url, verify=True)
            response.raise_for_status()

            # Extract price data from the response and convert it into a DataFrame
            data = response.json()['data']['values']
            df = pd.DataFrame(data)
            return df
        except requests.exceptions.RequestException as e:
            # Handle errors during data retrieval
            print("Error fetching data for {}: {}".format(self.crypto_symbol, e))
            return None

    def clean_data(self):
        # Rename columns for clarity
        self.data.columns = ['time', 'opening_price', 'closing_price', 'high', 'low', 'volume']

    def save_to_csv(self):
        # Save data to CSV file if data is available
        if self.data is not None:
            csv_filename = "{}_data.csv".format(self.crypto_symbol)
            self.data.to_csv(csv_filename, index=False)
            print("Data for {} saved to {}".format(self.crypto_symbol, csv_filename))
        else:
            # Print a message if no data is available
            print("Failed to save data for {}".format(self.crypto_symbol))

# List of cryptocurrencies to fetch data for
cryptos = ['bitcoin', 'ethereum', 'xrp']

# Create CryptoData objects for each cryptocurrency
crypto_data_objects = {crypto: CryptoData(crypto) for crypto in cryptos}

# Save data to CSV for each cryptocurrency
for crypto, crypto_data_object in crypto_data_objects.items():
    crypto_data_object.save_to_csv()

Data for bitcoin saved to bitcoin_data.csv
Data for ethereum saved to ethereum_data.csv
Data for xrp saved to xrp_data.csv


# step 3: Handling Missing Values

In [40]:
class CryptoData:
    def __init__(self, crypto_symbol):
        # Initialize CryptoData object for a specific cryptocurrency
        self.crypto_symbol = crypto_symbol
        # Fetch and store the data when the object is created
        self.data = self.scrape_data()
        # If data is successfully fetched, clean it up
        if self.data is not None:
            self.clean_data()

    def scrape_data(self):
        # API endpoint for fetching crypto data
        url = f"https://data.messari.io/api/v1/assets/{self.crypto_symbol}/metrics/price/time-series"

        try:
            # Make a request to the API
            response = requests.get(url, verify=True)
            response.raise_for_status()

            # Extract and convert data into a DataFrame
            data = response.json()['data']['values']
            df = pd.DataFrame(data)
            return df
        except requests.exceptions.RequestException as e:
            # Handle errors during data retrieval
            print(f"Error fetching data for {self.crypto_symbol}: {e}")
            return None

    def clean_data(self):
        # Rename columns for clarity
        self.data.columns = ['time', 'opening_price', 'closing_price', 'high', 'low', 'volume']
        # Drop rows with missing values
        self.data = self.data.dropna()
        
    def save_to_csv(self):
        # Save data to CSV file if data is available
        if self.data is not None:
            csv_filename = f"{self.crypto_symbol}_data.csv"
            self.data.to_csv(csv_filename, index=False)
            print(f"Data for {self.crypto_symbol} saved to {csv_filename}")
        else:
            # Print a message if no data is available
            print(f"Failed to save data for {self.crypto_symbol}")

# List of cryptocurrencies to fetch data for
cryptos = ['bitcoin', 'ethereum', 'xrp']

# Create CryptoData objects for each cryptocurrency
crypto_data_objects = {crypto: CryptoData(crypto) for crypto in cryptos}

# Save data to CSV for each cryptocurrency
for crypto, crypto_data_object in crypto_data_objects.items():
    crypto_data_object.save_to_csv()


Data for bitcoin saved to bitcoin_data.csv
Data for ethereum saved to ethereum_data.csv
Data for xrp saved to xrp_data.csv


# Step 4:Data Transformation

In [47]:
class CryptoData:
    def __init__(self, crypto_symbol):
        # Initialize CryptoData object for a specific cryptocurrency
        self.crypto_symbol = crypto_symbol
        # Fetch and process the data when the object is created
        self.data = self.scrape_data()
        if self.data is not None:
            self.clean_data()
            self.handle_missing_values()
            self.calculate_log_returns()
            self.save_to_csv()
            self.display_data()

    def scrape_data(self):
        # API endpoint for fetching crypto data
        url = "https://data.messari.io/api/v1/assets/{}/metrics/price/time-series".format(self.crypto_symbol)

        try:
            # Make a request to the API
            response = requests.get(url, verify=True)
            response.raise_for_status()  # Raise an HTTPError for bad responses

            # Extract and convert data into a DataFrame
            data = response.json()['data']['values']
            df = pd.DataFrame(data)
            return df
        except requests.exceptions.RequestException as e:
            # Handle errors during data retrieval
            print("Error fetching data for {}: {}".format(self.crypto_symbol, e))
            return None

    def clean_data(self):
        # Rename columns for clarity
        self.data.columns = ['time', 'opening_price', 'closing_price', 'high', 'low', 'volume']
        # Drop rows with missing values
        self.data = self.data.dropna()
        
    def handle_missing_values(self):
        # Handle missing values by filling with column means
        for column in self.data.columns:
            self.data[column] = self.data[column].fillna(self.data[column].mean())

    def calculate_log_returns(self):
        # Calculate logarithmic returns for price columns
        price_columns = ['opening_price', 'closing_price', 'high', 'low']
        for col in price_columns:
            self.data[f'{col}_log_return'] = np.log(self.data[col] / self.data[col].shift(1))

    def save_to_csv(self):
        # Save data to CSV file if data is available
        if self.data is not None:
            csv_filename = "{}_data.csv".format(self.crypto_symbol)
            self.data.to_csv(csv_filename, index=False)
            print("\nData for {} saved to {}".format(self.crypto_symbol, csv_filename))
        else:
            # Print a message if no data is available
            print("Failed to save data for {}".format(self.crypto_symbol))

    def display_data(self):
        # Display a subset of the processed data
        print("\nData for {} After Handling Missing Values and Calculating Log Returns:".format(self.crypto_symbol))
        print(self.data.head(10))

# List of cryptocurrencies to fetch and process data for
cryptos = ['bitcoin', 'ethereum', 'xrp']

# Create CryptoData objects for each cryptocurrency
crypto_data_objects = {crypto: CryptoData(crypto) for crypto in cryptos}



Data for bitcoin saved to bitcoin_data.csv

Data for bitcoin After Handling Missing Values and Calculating Log Returns:
            time  opening_price  closing_price          high           low  \
0  1700272800000   36424.550083   36462.474380  36416.855645  36424.394585   
1  1700276400000   36426.568890   36440.972455  36367.420248  36390.468414   
2  1700280000000   36388.013729   36441.350811  36373.474967  36378.598216   
3  1700283600000   36396.231319   36398.665754  36321.412924  36347.493327   
4  1700287200000   36347.507826   36378.633448  36272.382649  36272.382649   
5  1700290800000   36272.078494   36439.426592  36223.845714  36416.066588   
6  1700294400000   36423.589899   36489.184728  36401.815413  36457.307660   
7  1700298000000   36460.111751   36460.111751  36404.129823  36411.925599   
8  1700301600000   36403.696154   36488.533068  36403.696154  36488.533068   
9  1700305200000   36484.051418   36534.944419  36415.047597  36416.904304   

         volume  ope

# Step 5: Analysis

In [48]:
class CryptoData:
    def __init__(self, crypto_symbol):
        self.crypto_symbol = crypto_symbol
        self.data = self.scrape_data()

        if self.data is not None:
            self.clean_data()
            self.handle_missing_values()
            self.calculate_log_returns()
            self.save_to_csv()
            self.display_data()
            self.perform_analysis()

    def scrape_data(self):
        # Fetch crypto data from the API
        url = "https://data.messari.io/api/v1/assets/{}/metrics/price/time-series".format(self.crypto_symbol)

        try:
            response = requests.get(url, verify=True)
            response.raise_for_status()
            data = response.json()['data']['values']
            return pd.DataFrame(data)
        except requests.exceptions.RequestException as e:
            # Handle errors during data retrieval
            print("Error fetching data for {}: {}".format(self.crypto_symbol, e))
            return None

    def clean_data(self):
        # Rename columns for clarity
        self.data.columns = ['time', 'opening_price', 'closing_price', 'high', 'low', 'volume']
        # Drop rows with missing values
        self.data = self.data.dropna()
        
    def handle_missing_values(self):
        # Handle missing values by filling with column means
        self.data.fillna(self.data.mean(), inplace=True)

    def calculate_log_returns(self):
        # Calculate logarithmic returns for price columns
        price_columns = ['opening_price', 'closing_price', 'high', 'low']
        for col in price_columns:
            self.data[col + '_log_return'] = np.log(self.data[col] / self.data[col].shift(1))

    def save_to_csv(self):
        # Save data to CSV file if available
        if self.data is not None:
            csv_filename = "{}_data.csv".format(self.crypto_symbol)
            self.data.to_csv(csv_filename, index=False)
            print("\nData for {} saved to {}".format(self.crypto_symbol, csv_filename))
        else:
            # Print a message if no data is available
            print("Failed to save data for {}".format(self.crypto_symbol))

    def display_data(self):
        # Display a subset of the processed data
        print("\nData for {} After Handling Missing Values and Calculating Log Returns:".format(self.crypto_symbol))
        print(self.data.head(10))

    def perform_analysis(self):
        # Perform analysis on logarithmic returns
        print("\nAnalysis for {}:".format(self.crypto_symbol))

        # Calculate mean, median, and standard deviation for each log return column
        for col in ['opening_price_log_return', 'closing_price_log_return', 'high_log_return', 'low_log_return']:
            stats = self.data[col].describe()

            # Display analysis results
            print("\nAnalysis for {}:".format(col))
            print(stats)

            # Find date with the highest return for each log return column
            max_return_date = self.data.loc[self.data[col].idxmax(), 'time']
            max_return_value = self.data[col].max()

            # Display date with the highest return
            print("\nDate with the Highest Return for {}:".format(col))
            print("Date: {}".format(max_return_date))
            print("Highest Return Value: {}".format(max_return_value))

# List of cryptocurrencies to fetch, process, and analyze data for
cryptos = ['bitcoin', 'ethereum', 'xrp']

# Create CryptoData objects for each cryptocurrency
crypto_data_objects = {crypto: CryptoData(crypto) for crypto in cryptos}



Data for bitcoin saved to bitcoin_data.csv

Data for bitcoin After Handling Missing Values and Calculating Log Returns:
            time  opening_price  closing_price          high           low  \
0  1700272800000   36424.550083   36462.474380  36416.855645  36424.394585   
1  1700276400000   36426.568890   36440.972455  36367.420248  36390.468414   
2  1700280000000   36388.013729   36441.350811  36373.474967  36378.598216   
3  1700283600000   36396.231319   36398.665754  36321.412924  36347.493327   
4  1700287200000   36347.507826   36378.633448  36272.382649  36272.382649   
5  1700290800000   36272.078494   36439.426592  36223.845714  36416.066588   
6  1700294400000   36423.589899   36489.184728  36401.815413  36457.307660   
7  1700298000000   36460.111751   36460.111751  36404.129823  36411.925599   
8  1700301600000   36403.696154   36488.533068  36403.696154  36488.533068   
9  1700305200000   36484.051418   36534.944419  36415.047597  36416.904304   

         volume  ope

# Step 6: Visualization

In [78]:
class CryptoData:
    def __init__(self, crypto_symbol):
        self.crypto_symbol = crypto_symbol
        self.data = self.scrape_data()
        if self.data is not None:
            self.clean_data()
            self.handle_missing_values()
            self.calculate_log_returns()
            self.save_to_csv()
            self.display_data()
            self.perform_analysis()
            self.create_visualizations()

    def scrape_data(self):
        # API endpoint for fetching crypto data
        url = "https://data.messari.io/api/v1/assets/{}/metrics/price/time-series".format(self.crypto_symbol)

        try:
            # Make a request to the API
            response = requests.get(url, verify=True)
            response.raise_for_status()  # Raise an HTTPError for bad responses

            # Extract and convert data into a DataFrame
            data = response.json()['data']['values']
            df = pd.DataFrame(data)
            return df
        except requests.exceptions.RequestException as e:
            # Handle errors during data retrieval
            print("Error fetching data for {}: {}".format(self.crypto_symbol, e))
            return None

    def clean_data(self):
        # Rename columns for clarity
        self.data.columns = ['time', 'opening_price', 'closing_price', 'high', 'low', 'volume']
        # Drop rows with missing values
        self.data = self.data.dropna()
        
    def handle_missing_values(self):
        # Handle missing values by filling with column means
        for column in self.data.columns:
            self.data[column] = self.data[column].fillna(self.data[column].mean())

    def calculate_log_returns(self):
        # Calculate logarithmic returns for price columns
        price_columns = ['opening_price', 'closing_price', 'high', 'low']
        for col in price_columns:
            self.data[col + '_log_return'] = np.log(self.data[col] / self.data[col].shift(1))

    def save_to_csv(self):
        # Save data to CSV file if data is available
        if self.data is not None:
            csv_filename = "{}_data.csv".format(self.crypto_symbol)
            self.data.to_csv(csv_filename, index=False)
            print("\nData for {} saved to {}".format(self.crypto_symbol, csv_filename))
        else:
            # Print a message if no data is available
            print("Failed to save data for {}".format(self.crypto_symbol))

    def display_data(self):
        # Display a subset of the processed data
        print("\nData for {} After Handling Missing Values and Calculating Log Returns:".format(self.crypto_symbol))
        print(self.data.head())

    def perform_analysis(self):
        # Perform analysis on logarithmic returns
        print("\nAnalysis for {}:".format(self.crypto_symbol))

        # Calculate mean, median, and standard deviation for each log return column
        for col in ['opening_price_log_return', 'closing_price_log_return', 'high_log_return', 'low_log_return']:
            mean_return = self.data[col].mean()
            median_return = self.data[col].median()
            std_dev_return = self.data[col].std()

            # Display analysis results
            print("\nAnalysis for {}:".format(col))
            print("Mean: ", mean_return)
            print("Median: ", median_return)
            print("Standard Deviation: ", std_dev_return)

            # Find date with the highest return for each log return column
            max_return_date = self.data.loc[self.data[col].idxmax(), 'time']
            max_return_value = self.data[col].max()

            # Display date with the highest return
            print("\nDate with the Highest Return for {}:".format(col))
            print("Date: {}".format(max_return_date))
            print("Highest Return Value: {}".format(max_return_value))

    def create_visualizations(self):
        # Create and save visualizations
        self.plot_historical_prices()
        self.plot_mean_returns()
    #line chart
    def plot_historical_prices(self):
        # Plot historical closing prices and save the plot as an image
        plt.figure(figsize=(9, 6))
        plt.plot(self.data['time'], self.data['closing_price'], label='Closing Price', marker='o')
        plt.title("Historical Prices of {}".format(self.crypto_symbol))
        plt.xlabel("Date")
        plt.ylabel("Closing Price")
        plt.grid(True)
        plt.savefig("{}_historical_prices.png".format(self.crypto_symbol))
        plt.show()
    #bar chart
    def plot_mean_returns(self):
        # Plot mean returns for each log return column and save the plot as an image
        price_columns = ['opening_price', 'closing_price', 'high', 'low']
        mean_returns = self.data[[col for col in price_columns]].mean()

        plt.figure(figsize=(8, 5))
        mean_returns.plot(kind='bar', color=['yellow', 'indigo', 'beige', 'pink'])
        plt.title("Mean Returns of {}".format(self.crypto_symbol))
        plt.xlabel("Price Type")
        plt.ylabel("Mean Return")
        plt.savefig("{}_mean_returns.png".format(self.crypto_symbol))
        plt.show()

# List of cryptocurrencies to fetch, process, and analyze data for
cryptos = ['bitcoin', 'ethereum', 'xrp']

# Create CryptoData objects for each cryptocurrency
crypto_data_objects = {crypto: CryptoData(crypto) for crypto in cryptos}

SyntaxError: positional argument follows keyword argument (2019283544.py, line 97)

# Step 8:
## Summarize Findings

### Historical Prices:

Look at the line charts for how the prices of cryptocurrencies changed over time. Find any repeating patterns or big changes in the closing prices.

### Logarithmic Returns:

Study the average, middle, and spread of logarithmic returns for each cryptocurrency. Use these numbers to understand how much the prices go up and down and get an overall view of how well each cryptocurrency is doing.

### Date with Highest Return:

Find the dates when the prices of each cryptocurrency were the highest. Talk about why those dates might be special and if there were any important events that caused the prices to go up so much.

### Correlation Matrix:

Check how the returns of Bitcoin, Ethereum, and XRP are related using a correlation matrix. If the numbers are positive, it means they usually move in the same way, and if negative, they move in opposite directions.

### Visualizations:

Look at the line chart to see trends and important moments in the price history. Use a bar chart to compare the average returns and figure out which type of price change (opening, closing, high, low) usually gives more money.

### Overall Market Trends:

Sum up what you see in the data about how the market is generally moving. Talk about any times when things were very different from the usual.

### Influencing Factors:

Think about things outside of just the prices that might be affecting them. This could be rules changing, new technology, big economic events, or how people feel about the market.

### Limitations and Considerations:

Point out anything that might make the data or analysis not perfect. This could be missing information, things you had to guess, or stuff you didn't think about.