In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
!pip install nfts

In [4]:
import os
import sqlite3

import matplotlib.pyplot as plt
import nfts.dataset
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.special import zeta

In [6]:
os.listdir("/kaggle/input/ethereum-nfts")

In [7]:
path = "/kaggle/input/ethereum-nfts/nfts.sqlite"

In [9]:
data = nfts.dataset.FromSQLite(path)
nfts.dataset.explain()

In [11]:
owners = data.load_dataframe("current_owners")
owners.head()

In [12]:
len(owners)

In [13]:
highest = owners.groupby(["owner"], as_index=False).size().rename(columns={"size": "num_tokens"})
highest.sort_values("num_tokens", inplace=True, ascending=False)

In [14]:
print(len(highest))
highest.head(10)

In [19]:
#Histograms
plt.figure(figsize=(8, 6))
plt.xlabel("Number of tokens owned")
plt.ylabel("Number of addresses owning n tokens (log scale)")
plt.hist(highest["num_tokens"], bins=100, log=True)
plt.show()

In [20]:
#Log log plot
plt.figure(figsize=(8, 6))
plt.xlabel("Log of rank of token owner")
plt.ylabel("Log of number of tokens owned")
plt.plot([np.log(i+1) for i in range(len(highest["num_tokens"]))], np.log(highest["num_tokens"]))

In [21]:
contract = owners.groupby(["nft_address", "owner"], as_index=False).size().rename(columns={"size": "num_tokens"})
contract.head()

In [23]:
#Extracting Entropies
contract_group = contract.groupby(["nft_address"])

entropies = {}

for contract_address, owners_group in contract_group:
    total_supply = owners_group["num_tokens"].sum()
    owners_group["p"] = owners_group["num_tokens"]/total_supply
    owners_group["log(p)"] = np.log2(owners_group["p"])
    owners_group["-plog(p)"] = (-1) * owners_group["p"] * owners_group["log(p)"]
    entropy = owners_group["-plog(p)"].sum()
    entropies[contract_address] = entropy

In [26]:
plt.figure(figsize=(8,6))
plt.xlabel(f"Ownership entropy of NFT collection")
plt.ylabel("Number of NFT collections")
plt.hist(entropies.values(), bins=80)
plt.show()

In [27]:
sorted_entropies = [it for it in entropies.items()]
sorted_entropies.sort(key=lambda it: it[1], reverse=True)
entropies_df = pd.DataFrame.from_records(sorted_entropies, columns=["nft_address", "entropy"])

In [28]:
#Highest Entropies
entropies_df.head()

In [29]:
#Zero Entropies
entropies_df.tail()

In [30]:
#Low entropies
entropies_df.loc[entropies_df["entropy"] > 2].tail()