In [1]:
import pandas as pd

data = {'Feature1': ['10', '20', 'Thirty'],  # 'Thirty' is not a valid number
        'Feature2': [5.5, 6.7, 8.9]}

df = pd.DataFrame(data)
df['Feature1'] = df['Feature1'].astype(int)  # Error: Cannot convert 'Thirty' to int

print(df)



ValueError: invalid literal for int() with base 10: 'Thirty'

In [3]:
#Ans-Explanation: The invalid string 'Thirty' is replaced with NaN to handle errors.
import pandas as pd

data = {'Feature1': ['10', '20', 'Thirty'],
        'Feature2': [5.5, 6.7, 8.9]}

df = pd.DataFrame(data)
df['Feature1'] = pd.to_numeric(df['Feature1'], errors='coerce')  # Converts invalid values to NaN

print(df)



   Feature1  Feature2
0      10.0       5.5
1      20.0       6.7
2       NaN       8.9


In [4]:
#2.
import pandas as pd

data = {'A': [1, 2, None],  # Missing value
        'B': [4, None, 6]}  # Missing value

df = pd.DataFrame(data)
mean_value = df.mean()
df.fillna(mean_value)  # Error: fillna() does not modify in place

print(df)


     A    B
0  1.0  4.0
1  2.0  NaN
2  NaN  6.0


In [5]:
#Explanation: Missing values are filled with the column mean.
import pandas as pd

data = {'A': [1, 2, None],
        'B': [4, None, 6]}

df = pd.DataFrame(data)
df.fillna(df.mean(), inplace=True)  # Use inplace=True to modify the DataFrame

print(df)


     A    B
0  1.0  4.0
1  2.0  5.0
2  1.5  6.0


In [6]:
#3.from sklearn.linear_model import LinearRegression
import numpy as np

X = np.array([1, 2, 3, 4, 5])  # Error: X should be 2D
y = np.array([2, 4, 6, 8, 10])

model = LinearRegression()
model.fit(X, y)


NameError: name 'LinearRegression' is not defined

In [7]:
#ans
from sklearn.linear_model import LinearRegression
import numpy as np

X = np.array([[1], [2], [3], [4], [5]])  # Reshape X into 2D
y = np.array([2, 4, 6, 8, 10])

model = LinearRegression()
model.fit(X, y)

print(model.predict([[6]]))  # Predict for new value


[12.]


In [8]:
#4.
from sklearn.preprocessing import StandardScaler
import numpy as np

data = np.array([10, 20, 30, 40, 50])  # Error: Should be 2D

scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

print(scaled_data)


ValueError: Expected 2D array, got 1D array instead:
array=[10. 20. 30. 40. 50.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [9]:
from sklearn.preprocessing import StandardScaler
import numpy as np

data = np.array([[10], [20], [30], [40], [50]])  # Convert to 2D array

scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

print(scaled_data)


[[-1.41421356]
 [-0.70710678]
 [ 0.        ]
 [ 0.70710678]
 [ 1.41421356]]


In [10]:
#5.
from sklearn.linear_model import LogisticRegression

X = [[1, 2], [3, 4], [5, 6]]
y = ['yes', 'no', 'yes']  # Error: Labels should be numeric

model = LogisticRegression()
model.fit(X, y)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [11]:
#5.Explanation: Labels must be 0 and 1 for classification.
from sklearn.linear_model import LogisticRegression

X = [[1, 2], [3, 4], [5, 6]]
y = [1, 0, 1]  # Convert labels to numeric

model = LogisticRegression()
model.fit(X, y)

print(model.predict([[4, 5]]))


[1]


In [12]:
#6.import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = pd.DataFrame({'Category': ['A', 'B', 'C', 'A']})

encoder = OneHotEncoder()
encoded = encoder.fit_transform(df['Category'])  # Error: Data should be reshaped
print(encoded)


ValueError: Expected a 2-dimensional container but got <class 'pandas.core.series.Series'> instead. Pass a DataFrame containing a single row (i.e. single sample) or a single column (i.e. single feature) instead.

In [13]:
#ans -Explanation: Convert categorical data into binary columns.
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = pd.DataFrame({'Category': ['A', 'B', 'C', 'A']})

encoder = OneHotEncoder(sparse=False)
encoded = encoder.fit_transform(df[['Category']])  # Convert to 2D

print(encoded)


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'

In [14]:
#6.
from sklearn.model_selection import train_test_split

X = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [0, 1, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


ValueError: Found input variables with inconsistent numbers of samples: [4, 3]

In [8]:
#6.ans-No error, and correctly split data.

#Explanation: X and y must have the same number of samples.


from sklearn.model_selection import train_test_split

X = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [0, 1, 0, 1]  # Now y has the correct length

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("X_train:", X_train)
print("y_train:", y_train)


X_train: [[7, 8], [3, 4], [5, 6]]
y_train: [1, 1, 0]


In [None]:
#7.from sklearn.linear_model import LogisticRegression

X_train = [[1, 2], [3, 4], [5, 6]]
y_train = ["yes", "no", "yes"]  # Error: LogisticRegression expects numerical labels

model = LogisticRegression()
model.fit(X_train, y_train)


In [9]:
#ans A numerical prediction like [1] or [0].
#Explanation: Logistic Regression requires numeric labels, so we use LabelEncoder().
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

X_train = [[1, 2], [3, 4], [5, 6]]
y_train = ["yes", "no", "yes"]

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)  # Convert labels to numeric

model = LogisticRegression()
model.fit(X_train, y_train_encoded)

print(model.predict([[2, 3]]))


[1]


In [None]:
#8.
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Creating a dataset with missing values
X_train = np.array([[1, 2], [3, np.nan], [5, 6]])
y_train = np.array([10, 20, 30])

model = LinearRegression()
model.fit(X_train, y_train)  


In [13]:
#ans-Replaced NaN with column mean using SimpleImputer to allow LinearRegression to train and predict correctly.import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer

# Creating a dataset with missing values
X_train = np.array([[1, 2], [3, np.nan], [5, 6]])
y_train = np.array([10, 20, 30])

# Handling missing values by replacing NaN with the column mean
imputer = SimpleImputer(strategy="mean")
X_train_imputed = imputer.fit_transform(X_train)

# Train the model
model = LinearRegression()
model.fit(X_train_imputed, y_train)

# Make a prediction
prediction = model.predict([[3, 4]])
print("Predicted Output:", prediction)


Predicted Output: [20.]


In [None]:
#9.
from sklearn.tree import DecisionTreeClassifier

X_train = [[1, 2], [3, 4], [5, 6]]
y_train = ["spam", "ham", "spam"]  # Error: String labels not allowed

model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [12]:
#ans-Explanation:Scikit-learn classifiers require numerical labels, so LabelEncoder converts text labels into numerical values.
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)  # Convert to numbers

model.fit(X_train, y_train_encoded)
print(model.predict([[2, 3]]))  # Output: [0] or [1]


[1]


In [None]:
#10.from sklearn.svm import SVC

X_train = [[1, 100], [2, 200], [3, 300]]
y_train = [0, 1, 0]

model = SVC()
model.fit(X_train, y_train)
print(model.predict([[1, 150]]))  # Unreliable output due to large-scale difference


In [2]:
#ans- Explanation:SVM is sensitive to feature scales. StandardScaler normalizes the features, improving model performance.from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

model.fit(X_train_scaled, y_train)

X_test_scaled = scaler.transform([[1, 150]])
print(model.predict(X_test_scaled))  # More reliable output


NameError: name 'StandardScaler' is not defined