# **Inference**

In [4]:
from hdfs import InsecureClient
import pandas as pd
import xgboost as xgb
import pickle

# Set up HDFS client
hdfs_client = InsecureClient('http://localhost:9870', user='my_user')

# Path to the .csv file in HDFS
csv_path = '/user/Datasets/Healthcare-Diabetes.csv'

# Read the CSV file into a Pandas DataFrame
with hdfs_client.read(csv_path, encoding='utf-8') as reader:
    csv_data = pd.read_csv(reader)

# Preprocess the data
X = csv_data.drop(columns=['Id', 'Outcome'])  # Replace 'label' with the actual label column name
print(X.head())


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  
0                     0.627   50  
1                     0.351   31  
2                     0.672   32  
3                     0.167   21  
4                     2.288   33  


In [2]:
# Load your XGBoost model
with open('models/xgb_diabetes.pkl', 'rb') as f:
    xgb_model = pickle.load(f)


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.



In [7]:
# Perform inference
predictions_xgb = xgb_model.predict(X)
print(predictions_xgb)


[1 1 1 ... 1 1 1]


In [3]:
from hdfs import InsecureClient
from PIL import Image
import io
import numpy as np
from tensorflow.keras.models import load_model

# Set up HDFS client
hdfs_client = InsecureClient('http://localhost:9870', user='my_user')

# Path to the image directory in HDFS
image_dir_hdfs = '/user/Datasets/catdog'

# Load your ResNet50 model
resnet50_model = load_model('models/resnet50_cats_vs_dogs.h5')

# Read and preprocess a single image
image_path = '/user/Datasets/catdog/Cat/0.jpg'

with hdfs_client.read(image_path) as reader:
    img = Image.open(io.BytesIO(reader.read()))
    img = img.resize((224, 224))  # Resize to match ResNet50 input size
    img_array = np.expand_dims(np.array(img) / 255.0, axis=0)  # Normalize and add batch dimension




In [4]:
# Perform inference
prediction = resnet50_model.predict(img_array)
print(prediction)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 39s/step
[[0.43045753]]


## **TESTING**

In [2]:
import subprocess
import pandas as pd
from hdfs import InsecureClient
import os

In [3]:
client_hdfs = InsecureClient('http://localhost:9870', user='my_user')

In [4]:
with client_hdfs.read('/user/Datasets/Healthcare-Diabetes.csv', encoding = 'utf-8') as reader:
  df = pd.read_csv(reader,index_col=0)

In [5]:
df

Unnamed: 0_level_0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,6,148,72,35,0,33.6,0.627,50,1
2,1,85,66,29,0,26.6,0.351,31,0
3,8,183,64,0,0,23.3,0.672,32,1
4,1,89,66,23,94,28.1,0.167,21,0
5,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
2764,2,75,64,24,55,29.7,0.370,33,0
2765,8,179,72,42,130,32.7,0.719,36,1
2766,6,85,78,0,0,31.2,0.382,42,0
2767,0,129,110,46,130,67.1,0.319,26,1


In [6]:
with client_hdfs.read('/user/Datasets/train.csv', encoding = 'utf-8') as reader:
  df2 = pd.read_csv(reader,index_col=0)

In [7]:
df2

Unnamed: 0_level_0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
