In [35]:
import pandas as pd
import mysql.connector

# ==========================
# CONFIG
# ==========================

CSV_FILE = "Visadataset.csv"   # তোমার CSV নাম
BATCH_SIZE = 500              # 1 বার এ 500 row insert হবে (safe)

# ==========================
# LOAD CSV
# ==========================

print("Reading CSV file...")
data = pd.read_csv(CSV_FILE)

print("Total rows:", len(data))


# ==========================
# CONNECT MYSQL
# ==========================

conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",          # XAMPP এ password না থাকলে খালি
    database="us_visa_db"
)

cursor = conn.cursor()

print("Connected to MySQL")


# ==========================
# INSERT QUERY
# ==========================

sql = """
INSERT INTO visa_data
(case_id, continent, education_of_employee, has_job_experience,
 requires_job_training, no_of_employees, yr_of_estab,
 region_of_employment, prevailing_wage, unit_of_wage,
 full_time_position, case_status)

VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""


# ==========================
# CONVERT TO TUPLES
# ==========================

values = []

for _, row in data.iterrows():
    values.append((
        row["case_id"],
        row["continent"],
        row["education_of_employee"],
        row["has_job_experience"],
        row["requires_job_training"],
        int(row["no_of_employees"]),
        int(row["yr_of_estab"]),
        row["region_of_employment"],
        float(row["prevailing_wage"]),
        row["unit_of_wage"],
        row["full_time_position"],
        row["case_status"]
    ))


# ==========================
# BATCH INSERT
# ==========================

total = len(values)
inserted = 0

print("Start inserting...")

for i in range(0, total, BATCH_SIZE):

    batch = values[i:i + BATCH_SIZE]

    cursor.executemany(sql, batch)
    conn.commit()

    inserted += len(batch)

    print(f"Inserted: {inserted}/{total}")


# ==========================
# CLOSE
# ==========================

cursor.close()
conn.close()

print("✅ All data inserted successfully!")


Reading CSV file...
Total rows: 25480
Connected to MySQL
Start inserting...
Inserted: 500/25480
Inserted: 1000/25480
Inserted: 1500/25480
Inserted: 2000/25480
Inserted: 2500/25480
Inserted: 3000/25480
Inserted: 3500/25480
Inserted: 4000/25480
Inserted: 4500/25480
Inserted: 5000/25480
Inserted: 5500/25480
Inserted: 6000/25480
Inserted: 6500/25480
Inserted: 7000/25480
Inserted: 7500/25480
Inserted: 8000/25480
Inserted: 8500/25480
Inserted: 9000/25480
Inserted: 9500/25480
Inserted: 10000/25480
Inserted: 10500/25480
Inserted: 11000/25480
Inserted: 11500/25480
Inserted: 12000/25480
Inserted: 12500/25480
Inserted: 13000/25480
Inserted: 13500/25480
Inserted: 14000/25480
Inserted: 14500/25480
Inserted: 15000/25480
Inserted: 15500/25480
Inserted: 16000/25480
Inserted: 16500/25480
Inserted: 17000/25480
Inserted: 17500/25480
Inserted: 18000/25480
Inserted: 18500/25480
Inserted: 19000/25480
Inserted: 19500/25480
Inserted: 20000/25480
Inserted: 20500/25480
Inserted: 21000/25480
Inserted: 21500/2548

In [37]:
import pandas as pd
import mysql.connector

conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",
    database="us_visa_db"
)

query = "SELECT * FROM visa_data"

df = pd.read_sql(query, conn)

print(df.shape)
print(df.head())

conn.close()


  df = pd.read_sql(query, conn)


(25480, 13)
   id case_id continent education_of_employee has_job_experience  \
0   1  EZYV01      Asia           High School                  N   
1   2  EZYV02      Asia              Master's                  Y   
2   3  EZYV03      Asia            Bachelor's                  N   
3   4  EZYV04      Asia            Bachelor's                  N   
4   5  EZYV05    Africa              Master's                  Y   

  requires_job_training  no_of_employees  yr_of_estab region_of_employment  \
0                     N            14513         2007                 West   
1                     N             2412         2002            Northeast   
2                     Y            44444         2008                 West   
3                     N               98         1897                 West   
4                     N             1082         2005                South   

   prevailing_wage unit_of_wage full_time_position case_status  
0          592.203         Hour              

In [38]:
df.head()

Unnamed: 0,id,case_id,continent,education_of_employee,has_job_experience,requires_job_training,no_of_employees,yr_of_estab,region_of_employment,prevailing_wage,unit_of_wage,full_time_position,case_status
0,1,EZYV01,Asia,High School,N,N,14513,2007,West,592.203,Hour,Y,Denied
1,2,EZYV02,Asia,Master's,Y,N,2412,2002,Northeast,83425.6,Year,Y,Certified
2,3,EZYV03,Asia,Bachelor's,N,Y,44444,2008,West,122997.0,Year,Y,Denied
3,4,EZYV04,Asia,Bachelor's,N,N,98,1897,West,83434.0,Year,Y,Denied
4,5,EZYV05,Africa,Master's,Y,N,1082,2005,South,149907.0,Year,Y,Certified


In [39]:
df.shape

(25480, 13)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25480 entries, 0 to 25479
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     25480 non-null  int64  
 1   case_id                25480 non-null  object 
 2   continent              25480 non-null  object 
 3   education_of_employee  25480 non-null  object 
 4   has_job_experience     25480 non-null  object 
 5   requires_job_training  25480 non-null  object 
 6   no_of_employees        25480 non-null  int64  
 7   yr_of_estab            25480 non-null  int64  
 8   region_of_employment   25480 non-null  object 
 9   prevailing_wage        25480 non-null  float64
 10  unit_of_wage           25480 non-null  object 
 11  full_time_position     25480 non-null  object 
 12  case_status            25480 non-null  object 
dtypes: float64(1), int64(3), object(9)
memory usage: 2.5+ MB


In [41]:
df.isnull().sum()

id                       0
case_id                  0
continent                0
education_of_employee    0
has_job_experience       0
requires_job_training    0
no_of_employees          0
yr_of_estab              0
region_of_employment     0
prevailing_wage          0
unit_of_wage             0
full_time_position       0
case_status              0
dtype: int64