In [54]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv('procurement-notice.csv')
df.fillna(df.mean(), inplace=True)

max_vals = np.max(df.values, axis=0)
df.replace([np.inf, -np.inf], max_vals, inplace=True)

# Convert float32 columns to float64
if df.dtypes.values.any() == np.dtype('float32'):
    float_cols = df.select_dtypes(include=[np.float32]).columns
    df[float_cols] = df[float_cols].astype(np.float64)
    
df = df.apply(pd.to_numeric, errors='coerce')
if df.isnull().values.any():
    df.fillna(df.mean(), inplace=True)
if df.isnull().values.any():
    df.fillna(df.mean(), inplace=True)
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df = pd.read_csv('procurement-notices.csv')
print(df['Bid Description'].unique())
df['Publication Date'] = pd.to_datetime(df['Publication Date'])
df['Deadline Date'] = pd.to_datetime(df['Deadline Date'])

# Extract numerical features from the date columns
df['Publication Year'] = df['Publication Date'].dt.year
df['Publication Month'] = df['Publication Date'].dt.month
df['Publication Day'] = df['Publication Date'].dt.day
df['Deadline Year'] = df['Deadline Date'].dt.year
df['Deadline Month'] = df['Deadline Date'].dt.month
df['Deadline Day'] = df['Deadline Date'].dt.day
df = df.drop(['Publication Date', 'Deadline Date','Bid Description','Country Name'], axis=1)
df = pd.get_dummies(df, columns=['Notice Type', 'Procurement Type', 'Country Code', 'Major Sector'])

# Drop UN WANTED columns
df = df.drop(['ID', 'URL', 'Project ID'], axis=1)

if 'Notice Type_Invitation to bid' not in df.columns:
    y = df['Notice Type_Invitation for Bids']
else:
    y = df['Notice Type_Invitation to bid']   

try:
    df = df.astype(float)
except ValueError as e:
    print(e)
    print("Row with non-numeric value:", df[df.applymap(np.isreal).all(1)==False])

X = df.drop(y.name, axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(X_train, y_train)

accuracy = rfc.score(X_test, y_test)
print(f"Accuracy: {accuracy}")


  df.fillna(df.mean(), inplace=True)


TypeError: '>=' not supported between instances of 'str' and 'float'

In [52]:
print(df['Bid Description'].unique())


['Repair and Renovation (Roofing work) at LRS office'
 'MP04 WBN - 02 : Construction of 01 Gravel Road under Alternet            Connectivity in Bhind District.'
 'Small Contracts Award (DIR,CQS,INDV,SSS)' ...
 'Supply and Delivery of Vehicles to the Auditor General Office'
 'Vuelos fotogrametricos y confección de ortofomapas de los            departamentos de Matagalpa, Boaco y Chontales, Nicaragua.'
 'ADQUISICIÓN DE ONCE (11) EQUIPOS DE COMPUTACIÓNY DOS (2)            ESCÁNERES DE ALTO TRÁFICO']


  df.fillna(df.mean(), inplace=True)


TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

Index(['Publication Year', 'Publication Month', 'Publication Day',
       'Deadline Year', 'Deadline Month', 'Deadline Day',
       'Notice Type_Contract Award', 'Notice Type_General Procurement Notice',
       'Notice Type_Invitation for Prequalification',
       'Notice Type_Request for Expression of Interest',
       ...
       'Country Code_UZ', 'Country Code_VN', 'Country Code_VU',
       'Country Code_WS', 'Country Code_XK', 'Country Code_YF',
       'Country Code_ZA', 'Country Code_ZM', 'Country Code_ZR',
       'Major Sector_Agricultural Extension, Research, and Other Support Activities, Public Administration - Water, Sanitation and Waste Management, Sanitation'],
      dtype='object', length=108)
Index(['Publication Year', 'Publication Month', 'Publication Day',
       'Deadline Year', 'Deadline Month', 'Deadline Day',
       'Notice Type_Contract Award', 'Notice Type_General Procurement Notice',
       'Notice Type_Invitation for Prequalification',
       'Notice Type_Request

In [58]:
print(df.columns)

Index(['ID', 'URL', 'Notice Type', 'Publication Date', 'Project ID',
       'Bid Description', 'Procurement Type', 'Deadline Date', 'Country Code',
       'Country Name', 'Major Sector'],
      dtype='object')
