# Prediction Iphone prices using ADR Regression
* Data Cleaning and Rearranging
* Plotting
* Feature Selection
* ADR Regression 92.74%

In [None]:
import numpy as np 
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import re
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("/kaggle/input/used-iphone-11-prices-in-us/filename.csv")
print(df.shape)
df.head()

## Data Cleaning and Rearranging

### Dont consider rows which contains extraneous values

In [None]:
df = df[df.GB != "0"]
df = df[df.Color != "0"]

### Rearrange column values
1. Replace description column to iphone 11 as we know all phones are iphone 11 and we have already take out required information out of it.
2. Convert Price values to integer so that it will easy for algorithm to work on it.
3. Remove 'GB' from GB column

In [None]:
for index,row in df.iterrows():
    color = row['Color']
    gb = row['GB']
    df.at[index,'Description'] = "iPhone 11"
    df.at[index,'Price'] = round(row['Price'])
df.GB = df.GB.str.replace('GB', '').astype(int)
df.Price = df.Price.astype(int)
df.head()

### Check unique colors

In [None]:
color = df.Color.unique()
color.sort()
color

### Plot using Bar Plot

In [None]:
df["Color"].value_counts().plot(kind="barh")

### Replace string to integer for color values

In [None]:
color = df.Color.unique()
color.sort()
col_dict = dict(zip(color, range(len(color))))
df.Color.replace(col_dict, inplace=True)
print(col_dict)

In [None]:
df.head()

## Plot

In [None]:
df["Pro?"].value_counts().plot(kind="barh")

In [None]:
df["Unlock?"].value_counts().plot(kind="barh")

In [None]:
df["Max?"].value_counts().plot(kind="barh")

In [None]:
df["GB"].value_counts().plot(kind="barh")

In [None]:
df.Price.hist()

## Feature Selection

In [None]:
y = df.Price
x = df.drop(columns=["Description", "Price"])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

## Train using ARDRegression

In [None]:
from sklearn import linear_model

clf = linear_model.ARDRegression()
clf.fit(x_train, y_train)
pred = clf.predict(x_test)
accuracy = clf.score(x_test, y_test)
print(accuracy*100,'%')