# Napoved število dni živali preživetih v zavetišču
S pomočjo linearne regresije in random forest smo napovedali število dni, ki jih bodo živali preživele v zavetišču glede na raznorazne atribute.

### Matrika

In [1]:
import numpy as np
from csv import DictReader

#matrika
#vrstica -> žival -> id
#stolpec -> št dni (0+), tip (mačka = 0, pes = 1), spol(moški = 0, ženski = 1), kastriran (0 = ni, 1 = je)
            #cepljen (0 = ni, 1 = je), cipiran (0 = ni, 1 = je), cluster(0-15),
matrika = np.zeros((626,7), dtype=np.float32)

zivali = DictReader(open('zivali.csv', 'rt', encoding='utf-8'))

for row in zivali:
    try:
        id = int(row["id"])
        st_dni = int(row["cas_v_zavetiscu"])
        tip = row["tip"]
        spol = row["spol"]
        if st_dni < 0:
            st_dni = 0 #napačne povozim
        if tip == "pes":
            zival = 1
        elif tip == "muc": 
            zival = 0
        if spol == "Moški":
            s = 0
        elif spol == "Ženski":
            s = 1
        matrika[id, 0] = st_dni
        matrika[id, 1] = zival
        matrika[id, 2] = s
    except:
        pass

oskrba = DictReader(open('veterinarska_oskrba.csv', 'rt', encoding='utf-8'))
for row in oskrba:
     try:
        id = int(row["id"])
        kastriran = int(row["sterilizirana_kastrirana"])
        cepljen = int(row["cepljena"])
        cipiran = int(row["cipirana"])
        matrika[id, 3] = kastriran
        matrika[id, 4] = cepljen
        matrika[id, 5] = cipiran
     except:
         pass

gruce = DictReader(open('embedded-slike.csv', 'rt', encoding='utf-8'))
next(gruce)
next(gruce)
for row in gruce:
    try:
        id = int(row["image name"])
        gruca = int(row["Cluster"][1:])
        matrika[id, 6] = gruca
    except:
        pass

print("Primer vrstice v matriki:")
print("st dni, tip, spol, kastriran, cepljen, cipiran, gruca")
print(matrika[307, :])

Primer vrstice v matriki:
st dni, tip, spol, kastriran, cepljen, cipiran, gruca
[155.   0.   0.   1.   1.   1.  12.]


### Linarna regresija

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

X = matrika[:, 1:]  # atributi
y = matrika[:, 0]   # cilj napovedovanja
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mae = round(mean_absolute_error(y_test, y_pred),2)

print("Mean Absolute Error z linearno regresijo:", mae, "dni napake.")

Mean Absolute Error z linearno regresijo: 68.66 dni napake.


In [3]:
nov_primer = np.array([0, 1, 1, 1, 1, 13]) 
X_novi = nov_primer.reshape(1, -1)  
napoved = model.predict(X_novi)

print("InkyLinx (maček):")
print("Napovedana vrednost:", round(napoved[0],2), "dni")
print("Prava vrednost: 211 dni") #primer: https://www.zavetisce-horjul.net/zivali1/muc/InkyLinx.php

InkyLinx (maček):
Napovedana vrednost: 196.55 dni
Prava vrednost: 211 dni


### Ansambli, random forest

In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

model = RandomForestRegressor(n_estimators=150, random_state=42)  
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = round(mean_absolute_error(y_test, y_pred),2)

print("Mean Absolute Error z random forest in ansambli:", mae, "dni napake.")

Mean Absolute Error z random forest in ansambli: 60.76 dni napake.


In [5]:
nov_primer = np.array([0, 1, 1, 1, 1, 13]) 
X_novi = nov_primer.reshape(1, -1)  
napoved = model.predict(X_novi)

print("InkyLinx (maček):")
print("Napovedana vrednost:", round(napoved[0],2), "dni")
print("Prava vrednost: 211 dni") #primer: https://www.zavetisce-horjul.net/zivali1/muc/InkyLinx.php

InkyLinx (maček):
Napovedana vrednost: 333.01 dni
Prava vrednost: 211 dni
