In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression



In [8]:
computers_df = pd.read_csv('computers.csv')
computers_df

Unnamed: 0,Name,Date,Price,Flops,Quantity
0,ENIAC,1945,7002275.0,385,1
1,IBM 7030 Stretch,1961,67380000.0,1200,9
2,Cray 1,1975,33700000.0,160000000,100
3,Cray 2,1985,77299124.0,1900000000,27
4,Pentium M 730,2003,293.98,3900000000,1000000
5,Sony PlayStation 4,2013,444.39,1840000000000,116900000
6,AMD Ryzen 3600,2019,201.46,3600000000000,50000
7,Xbox Series X,2020,505.16,12100000000000,12000000


In [61]:
Q = computers_df['Quantity'].tolist()
P = computers_df['Price'].tolist()
O = computers_df['Flops'] / 10**6
logO = np.log(O)

In [62]:
F, M = [], []
computers = computers_df['Name'].tolist()
for c in computers:
    production_date = computers_df[computers_df['Name'] == c]['Date'].values[0]
    fabrications = find_fabrications(production_date)
    transistor = find_transistor(production_date)
    F.append(fabrications)
    M.append(transistor)
F, M

([[0, 0, 0, 0, 0, 0],
  [1, 0, 0, 0, 0, 0],
  [1, 0, 0, 1, 0, 0],
  [1, 1, 0, 1, 1, 0],
  [1, 1, 1, 1, 1, 1],
  [1, 1, 1, 1, 1, 1],
  [1, 1, 1, 1, 1, 1],
  [1, 1, 1, 1, 1, 1]],
 [0, 0, 6.0, 1.0, 0.09, 0.022000000000000002, 0.006999999999999999, 0.005])

In [70]:
F = np.array(F).T
P = np.array(P)
X = np.array([Q, M, F[0], F[1], F[2], F[3], F[4], F[5], O])
X.shape, P.shape

((9, 8), (8, 1))

In [81]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly = PolynomialFeatures(2)
X_poly = poly.fit_transform(X.T)
regr = LinearRegression()
# X_poly.shape
regr.fit(X_poly, P)
r2_score = regr.score(X_poly,P)
regr.coef_

array([[-3.91701089e+04,  3.17957107e+06, -2.61182323e+03,
         5.73835561e+05,  5.81527152e+01, -1.39616703e+00,
        -3.89247377e+02,  5.60780088e+01, -1.38101617e+00,
         3.63567349e+04, -1.21735859e-03, -3.89791845e+05,
         3.75340769e+06, -1.37008432e+06, -1.37163571e+06,
        -1.41461691e+06, -1.37008432e+06, -1.37163571e+06,
         6.17546910e-02, -1.59742541e+04, -2.61461746e+03,
         5.73346305e+01, -1.24394042e-01, -2.61461746e+03,
         5.73346305e+01, -1.24394042e-01, -3.18796899e+05,
         5.73835774e+05,  5.60780088e+01, -1.38101617e+00,
        -3.89247315e+02,  5.60780088e+01, -1.38101617e+00,
         3.65777699e+04,  5.60780088e+01, -1.38101617e+00,
         5.60780088e+01,  5.60780088e+01, -1.38101617e+00,
         1.07140740e+05, -1.38101617e+00, -1.38101617e+00,
        -1.38101617e+00, -1.38101617e+00, -2.03140431e+03,
        -3.89247315e+02,  5.60780088e+01, -1.38101617e+00,
         3.58886667e+04,  5.60780088e+01, -1.38101617e+0

In [25]:
mosfet_df = pd.read_csv('mosfet.csv')
mosfet_df

Unnamed: 0,dimension (mircons),invention
0,10.0,1971
1,6.0,1974
2,3.0,1977
3,1.5,1981
4,1.0,1984
5,0.8,1987
6,0.6,1990
7,0.35,1993
8,0.25,1996
9,0.18,1999


In [26]:
fabrication_df = pd.read_csv('fabrication.csv')
fabrication_df

Unnamed: 0,method,invention
0,photolithography,1958
1,e-beam lithography,1980
2,focused ion beam,1995
3,liquid metal ion source,1970
4,scanning probe lithography,1981
5,dip pen lithography,1999


In [5]:
computers = computers_df['Name'].tolist()
computers

['ENIAC',
 'IBM 7030 Stretch',
 'Cray 1',
 'Cray 2',
 'Pentium M 730',
 'Sony PlayStation 4',
 'AMD Ryzen 3600',
 'Xbox Series X']

In [29]:
fabrication_df[' invention'].tolist()

[1958, 1980, 1995, 1970, 1981, 1999]

In [34]:
def find_transistor(date):
    dates = mosfet_df['invention'].tolist()
    closest = 0
    for d in dates:
        if d > date:
            continue
        if d > closest:
            closest = mosfet_df.iloc[mosfet_df.index[mosfet_df['invention'] == d].tolist()[0]]['dimension (mircons)']
    return closest

def find_fabrications(date):
    dates = fabrication_df[' invention'].tolist()
    return [1 if date > d else 0 for d in dates]

In [120]:
X = []
X_a = []
y = []
y_a = []
for c in computers:
    
    production_date = computers_df[computers_df['Name'] == c][' Date'].values[0]
    price = computers_df[computers_df['Name'] == c][' Price'].values[0]
    quantity = computers_df[computers_df['Name'] == c][' Quantity'].values[0]
    transistor = find_transistor(production_date)
    fabrications = find_fabrications(production_date)
    x_row = [transistor/price] + [f / price for f in fabrications] + [1/price, 1]
    x_a_row = [transistor/quantity] + [f / quantity for f in fabrications] + [1/quantity, 1]
    
    X.append(x_row)
    X_a.append(x_a_row)
    y.append(computers_df[computers_df['Name'] == c][' Flops'].values[0] / price)
    y_a.append(computers_df[computers_df['Name'] == c][' Flops'].values[0] / quantity)

In [121]:
X, y

([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.4281072937009756e-07, 1],
  [0.0,
   1.4841199168892847e-08,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   1.4841199168892847e-08,
   1],
  [5.857566765578635e-05,
   2.9673590504451037e-08,
   0.0,
   0.0,
   2.9673590504451037e-08,
   0.0,
   0.0,
   2.9673590504451037e-08,
   1],
  [2.5666526311475404e-05,
   1.2936757213445266e-08,
   1.2936757213445266e-08,
   0.0,
   1.2936757213445266e-08,
   1.2936757213445266e-08,
   0.0,
   1.2936757213445266e-08,
   1],
  [6.813388665895639,
   0.003401591945030274,
   0.003401591945030274,
   0.003401591945030274,
   0.003401591945030274,
   0.003401591945030274,
   0.003401591945030274,
   0.003401591945030274,
   1],
  [4.527554625441617,
   0.002250275658768199,
   0.002250275658768199,
   0.002250275658768199,
   0.002250275658768199,
   0.002250275658768199,
   0.002250275658768199,
   0.002250275658768199,
   1],
  [10.016876799364638,
   0.0049637645190112175,
   0.0049637645190112175,
   0.004963

In [122]:
X_a, y_a

([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1],
  [0.0, 0.1111111111111111, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1111111111111111, 1],
  [19.74, 0.01, 0.0, 0.0, 0.01, 0.0, 0.0, 0.01, 1],
  [73.48148148148148,
   0.037037037037037035,
   0.037037037037037035,
   0.0,
   0.037037037037037035,
   0.037037037037037035,
   0.0,
   0.037037037037037035,
   1],
  [0.002003, 1e-06, 1e-06, 1e-06, 1e-06, 1e-06, 1e-06, 1e-06, 1],
  [1.7211291702309665e-05,
   8.55431993156544e-09,
   8.55431993156544e-09,
   8.55431993156544e-09,
   8.55431993156544e-09,
   8.55431993156544e-09,
   8.55431993156544e-09,
   8.55431993156544e-09,
   1],
  [0.04036, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 1],
  [0.00016833333333333332,
   8.333333333333334e-08,
   8.333333333333334e-08,
   8.333333333333334e-08,
   8.333333333333334e-08,
   8.333333333333334e-08,
   8.333333333333334e-08,
   8.333333333333334e-08,
   1]],
 [385.0,
  133.33333333333334,
  1600000.0,
  70370370.37037037,
  3900.0,
  15739.948674080411,


In [123]:
reg = LinearRegression().fit(X, y)

In [124]:
y_pred = reg.predict(X)

In [125]:
reg.coef_

array([ 3.91201754e+11, -9.54713746e+17, -3.44704269e+17,  6.11134022e+17,
        5.31787730e+17, -3.44704269e+17,  6.11134022e+17, -1.10722426e+17,
        0.00000000e+00])

In [126]:
from sklearn.manifold import TSNE
from sklearn.metrics import r2_score

In [127]:
r2_score(y, y_pred)

0.8543461346783179

In [135]:
reg2 = LinearRegression().fit(X_a, y_a)
y2_pred = reg2.predict(X_a)

In [136]:
reg2.coef_

array([ 2.61221885e+11, -2.67131432e+06, -1.30522717e+12, -2.65003090e+12,
       -5.15651871e+14, -1.30522730e+12, -2.65003085e+12, -3.33631165e+05,
        0.00000000e+00])

In [137]:
r2_score(y_a, y2_pred)

0.9999703208029341