This notebook has the goal of trying to recreate the following plot, 
detailing Mamajek's Law, the predicted doubling time of the number of discovered exoplanets:
https://figshare.com/articles/figure/Cumulative_Number_of_Exoplanets_Discoveries_Versus_Time/4057704

See the end of the notebook for a comparison of the two plots.

In [None]:
# Creating the full custom plot
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.ticker import MaxNLocator

def format_mamajek_plot():
    years_of_interest={(2009,2018):'Kepler Era'}
    caption_label="""Data from NASA Exoplanet Archive
http://exoplanetarchive.ipac.caltech.edu
C. Moran (NASA ExEP) 04/30/2021
"""

    doubling_time_label="""Exoplanet
Discovery
Doubling Time:
27 months
"""
    # Setting global font parameters
    figsize=6
    font = {#'family':'monospace',
        #'family':'sans-serif',
        'family':'serif',
        'weight':'bold',
        'size':11}
    matplotlib.rc('font', **font)
    figure=matplotlib.pyplot.figure(figsize=(figsize,figsize)) 
    ax = figure.add_subplot(1, 1, 1, position = [0.2, 0.15, 0.75, 0.75]) 

    ax.set_yscale('log')
    #ax.set_title("Mamajek's Law")
    ax.set_xlabel('Year')
    ax.set_ylabel("Cumalative # Exoplanets")
    ax.get_yaxis().set_major_formatter(
        matplotlib.ticker.FuncFormatter(lambda x, p: int(x)))

    ax.yaxis.set_ticks_position('both')
    ax.xaxis.set_ticks_position('both')
    ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True)
    ax.tick_params(which='both',direction='in')
    ax.xaxis.set_minor_locator(plt.MultipleLocator(1))
    ax.xaxis.set_major_locator(plt.MultipleLocator(5))
    ax.annotate(caption_label,xytext=(80,-20),textcoords='offset points',xy=(years[0],counts[0]),size=10)
    ax.annotate(doubling_time_label,xytext=(-310,0),textcoords='offset points',xy=(years.iloc[-1],counts.iloc[-1]),size=10)
    return ax

def add_exoplanet_labels(ax):
    planets_of_interest_with_labels={
    'HD 114762 b':'HD 114762 b',
    'PSR B1257+12 c':'B1257+12 Pulsar planets',
    '51 Peg b':'51 Peg b',
    'HD 209458 b':'HD 209458 b',
    'OGLE-2003-BLG-235L b':'OGLE 2003-BLG-235L',
    'HR 8799 b':'HR 8799 bcd',
    #'HR 8799 b':'Fomalhaut b'
    }
    # Let's add some items to the plot!
    begin_year=planetary_systems['disc_year'].min()
    for planet,planet_label in planets_of_interest_with_labels.items():
        try:
            disc_year_x=planetary_systems[planetary_systems['pl_name']==planet]['disc_year'].values[0]
            disc_year_idx=disc_year_x-begin_year
            disc_year_y=counts[disc_year_idx]
            disc_year_y_added=np.log2(disc_year_y)
            ax.annotate(planet_label, xytext=(-2, 10),textcoords='offset points',rotation=90,xy=(disc_year_x,disc_year_y))
            # add a special below label for fomalhaut b
            if planet=='HR 8799 b':
                ax.annotate('Fomalhaut b',xytext=(-2, -85),textcoords='offset points',rotation=90,xy=(disc_year_x,disc_year_y))
        except:
            print("missing",planet,"double check it exists in data")
    kepler_label_year=2009 #2009-2018 Kepler years
    kepler_year_idx=kepler_label_year-begin_year
    ax.annotate('Kepler era',xytext=(0,20),textcoords='offset points',xy=(kepler_label_year,counts[kepler_year_idx]),rotation=45)


# Reading data in
planetary_systems=pd.read_csv('/kaggle/input/nasa-exoplanet-archive-planetary-systems/PSCompPars_2021.04.27_16.49.16.csv')
# Setup data
years=[]
counts=[]
begin_year=planetary_systems['disc_year'].min()
end_year=planetary_systems['disc_year'].max()
for yr in range(begin_year,end_year+1):
    years+=[yr]
    counts+=[planetary_systems[planetary_systems['disc_year'] <= yr].shape[0]]
years=pd.Series(years)
counts=pd.Series(counts)

# Create and format figure
ax=format_mamajek_plot()
# Plot basic data, counts vs. years as dots and lines between them
ax.scatter(years,counts,color='black')
ax.plot(years,counts,color='black',linewidth=1)

    
# add the planet labels to match https://figshare.com/articles/figure/Cumulative_Number_of_Exoplanets_Discoveries_Versus_Time/4057704
add_exoplanet_labels(ax)



# This whole cell is defining various types of fits
# try to fit an exponential
def plot_doubling_months(doubling_time_months,anchor_to_year=None):
    if not anchor_to_year:
        anchor_to_year=years[0]
        begin_year_idx=0
    else:
        begin_year_idx=years[years==anchor_to_year[0]].index[0]
    begin_count=counts[begin_year_idx]
    doubling_time_years=doubling_time_months/12.
    
    # this is going to be our data to plot
    last_year=years.iloc[-1]
    n_steps=np.ceil((last_year-anchor_to_year)/doubling_time_years)
    years_doubling_line=anchor_to_year+np.arange(n_steps)*doubling_time_years
    counts_doubling_line=2**np.arange(n_steps)*counts[0]
    ax.plot(years_doubling_line,counts_doubling_line, linestyle='--', linewidth=1, color='red')    


def fit_exponential():
    from scipy.optimize import curve_fit
    mapped_years=range(0,end_year-begin_year+1) 
    def exponential(x, a, b):
        return a*np.exp(b*x)
    pars, cov = curve_fit(f=exponential, xdata=mapped_years, ydata=counts, p0=[0, 0], bounds=(-np.inf, np.inf))
    stdevs = np.sqrt(np.diag(cov))
    res = counts - exponential(mapped_years, *pars)
    ax.plot(years, exponential(mapped_years, *pars), linestyle='--', linewidth=2, color='blue')

def fit_line_manual_log10(restrict_years=None):
    from scipy.optimize import curve_fit
    def tentothe(x, a, b):
        return 10**(a+b*x)
    if restrict_years:
        begin_idx=years[years==restrict_years[0]].index[0]
        end_idx=years[years==restrict_years[1]].index[0]
        print(begin_idx,end_idx)
    else:
        begin_idx=0
        end_idx=len(years)-1
    years_fit=years[begin_idx:end_idx+1]
    counts_fit=counts[begin_idx:end_idx+1]
    pars, cov = curve_fit(f=tentothe, xdata=years_fit, ydata=counts_fit, p0=[-262.71118,  0.13209], bounds=(-np.inf, np.inf))
    stdevs = np.sqrt(np.diag(cov))
    res = counts_fit - tentothe(years_fit, *pars)
    print(["{:.7f}".format(num) for num in pars])
    ax.plot(years, tentothe(years, *pars), linestyle='--', linewidth=2, color='orange')

def fit_log10():
    from scipy.optimize import curve_fit
    def tentothe(x, a, b):
        return 10**(a+b*x)
    pars, cov = curve_fit(f=tentothe, xdata=years, ydata=counts, p0=[-262.71118,  0.13209], bounds=(-np.inf, np.inf))
    stdevs = np.sqrt(np.diag(cov))
    res = counts - tentothe(years, *pars)
    ax.plot(years, tentothe(years, *pars), linestyle='--', linewidth=2, color='blue')

def fit_mamajek_figshare():
    """this one is verified to work"""
    logCounts = -262.71118 + 0.13209*years
    ax.plot(years,10**logCounts,linestyle='--',linewidth=2,color='red')
def fit_mamajek_2019():
    logCounts = -245.9906 + 0.1236755*years
    ax.plot(years,10**logCounts,linestyle='--',linewidth=2,color='red')
def fit_mamajek3():
    logCounts =  -256.0817 + 0.129114349*years
    ax.plot(years,10**logCounts,linestyle='--',linewidth=2,color='red')
def fit_mamajek4():
    logCounts =  0.129114349*(years-1989)
    ax.plot(years,10**logCounts,linestyle='--',linewidth=2,color='red')
    
###
## Differing Fits:    
## fit an exponential
#fit_exponential()
#fit_mamajek_figshare() # this one match
#fit_log10()
#fit_line_manual_log10(restrict_years=(2008,2010))
plot_doubling_months(27)
plt.savefig("mamajeks_law_2021.png",dpi=300) # this is available for download at /kaggle/working/mamajeks_law_2021.png

Compare this plot to the original, which cuts off in ~2016

![Image](https://ndownloader.figshare.com/files/6570840/preview/6570840/preview.jpg)