In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

options = Options()
options.add_argument("--start-maximized")


# Open the browser and go to the target page
driver = webdriver.Chrome(options=options)
driver.get("https://seei.ir/default.aspx?tabid=113")
wait = WebDriverWait(driver, 10)

# List to save all Features
all_codes = []
names = []
ostan = []
shahrestan = []
shahr = []
roosta = []
address = []
code_tel = []
reg_num = []
nationalcode = []

def get_text_or_null(tag):
    return tag.text.strip() if tag and tag.text.strip() else "NULL"

while True:
    wait.until(EC.presence_of_element_located((By.ID, 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00')))

    # Read the page source and parse with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find('table', {'id': 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00'})
    rows = table.find_all('tr', class_='FundStatusLicensed')

    # Loop through each visible fund row
    for i, row in enumerate(rows):
       
        try:
            cols = row.find_all('td')
            if cols:
                text = cols[0].text.strip()
                if text.isdigit():
                    all_codes.append(text)
                    print("Fund Code:", text)

            # Find the detail button by its ID pattern
            btn_id = f'ctl13_ctl03_ctl00_Search__rgFunds_ctl00_ctl{str(2*i+4).zfill(2)}__imgbDetail'
            print(f'Button ID: {btn_id}')
            detail_button = driver.find_element(By.ID, btn_id)

            # Scroll to the button and click it
            driver.execute_script("arguments[0].scrollIntoView(true);", detail_button)
            time.sleep(0.5)
            detail_button.click()

            #Scraping the Detail model and append features to a list:
            time.sleep(1.5)
            modal_soup = BeautifulSoup(driver.page_source, 'html.parser')
            detail_items = modal_soup.find_all("div", class_=["col-6 col-md-6 detailItem", "col-12 col-md-6 detailItem"])
            row_data = []

            for div in detail_items:
                spans = div.find_all("span")
                if len(spans) >= 2:
                    row_data.append(get_text_or_null(spans[1]))
                else:
                    row_data.append("NULL")

            while len(row_data) < 9:
                row_data.append("NULL")

            names.append(row_data[0])
            ostan.append(row_data[1])
            shahrestan.append(row_data[2])
            shahr.append(row_data[3])
            roosta.append(row_data[4])
            address.append(row_data[5])
            code_tel.append(str(row_data[6]))
            reg_num.append(row_data[7])
            nationalcode.append(row_data[8])
            
            print("✅ Detail modal opened")
            time.sleep(1)

            # Close the modal using JavaScript
            driver.execute_script("$('.modal.fade.show').modal('hide');")
            print("🔄 Modal closed")
            time.sleep(0.5)

        except Exception as e:
            print(f'❌ Error at row {i}: {e}')
            continue

    # Try clicking the next page button
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pager_div = soup.find('div', {'class': 'rgWrap rgNumPart'})
        current_page_tag = pager_div.find('a', class_='rgCurrentPage')
        all_page_tags = pager_div.find_all('a')

        if not current_page_tag:
            print("⚠️ Cannot find current page.")
            break

        current_page = int(current_page_tag.text.strip())
        all_pages = [int(a.text.strip()) for a in all_page_tags if a.text.strip().isdigit()]
        print(f"📄 Pages shown: {all_pages} / Current: {current_page} / Max on screen: {max(all_pages)}")

        # Find the "Next" button
        next_button = driver.find_element(By.XPATH,
            '/html/body/form/div[4]/div[8]/div/div/div[2]/div/div/div[3]/div/div/table/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]'
        )

        if not next_button.is_enabled():
            print("✅ Reached the last page (Next button disabled).")
            break

        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)

        old_page = current_page
        next_button.click()

        driver.execute_script("window.scrollTo(0, 0);")
        WebDriverWait(driver, 10).until(
            lambda d: int(BeautifulSoup(d.page_source, 'html.parser')
                          .find('a', class_='rgCurrentPage').text.strip()) != old_page
        )
        time.sleep(1)

    except Exception as e:
        print("📛 Error or maybe last page:", e)
        break

# Print final result
# print("🎯 Final fund codes:", all_codes)
# print("🎯 Final fund name:", names)





In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

# Headless Chrome settings
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Start WebDriver
driver = webdriver.Chrome(options=options)
driver.get("https://seei.ir/default.aspx?tabid=113")
wait = WebDriverWait(driver, 10)

# Feature lists
all_codes = []
names = []
ostan = []
shahrestan = []
shahr = []
roosta = []
address = []
code_tel = []
reg_num = []
nationalcode = []

def get_text_or_null(tag):
    return tag.text.strip() if tag and tag.text.strip() else "NULL"

while True:
    wait.until(EC.presence_of_element_located((By.ID, 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00')))
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find('table', {'id': 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00'})
    rows = table.find_all('tr', class_='FundStatusLicensed')

    for i, row in enumerate(rows):
        try:
            cols = row.find_all('td')
            if cols:
                text = cols[0].text.strip()
                if text.isdigit():
                    all_codes.append(text)

            btn_id = f'ctl13_ctl03_ctl00_Search__rgFunds_ctl00_ctl{str(2*i+4).zfill(2)}__imgbDetail'
            detail_button = driver.find_element(By.ID, btn_id)

            driver.execute_script("arguments[0].scrollIntoView(true);", detail_button)
            time.sleep(0.5)
            detail_button.click()

            time.sleep(1.5)
            modal_soup = BeautifulSoup(driver.page_source, 'html.parser')
            detail_items = modal_soup.find_all("div", class_=["col-6 col-md-6 detailItem", "col-12 col-md-6 detailItem"])
            row_data = []

            for div in detail_items:
                spans = div.find_all("span")
                if len(spans) >= 2:
                    row_data.append(get_text_or_null(spans[1]))
                else:
                    row_data.append("NULL")

            while len(row_data) < 9:
                row_data.append("NULL")

            names.append(row_data[0])
            ostan.append(row_data[1])
            shahrestan.append(row_data[2])
            shahr.append(row_data[3])
            roosta.append(row_data[4])
            address.append(row_data[5])
            code_tel.append(row_data[6])
            reg_num.append(row_data[7])
            nationalcode.append(row_data[8])

            driver.execute_script("$('.modal.fade.show').modal('hide');")
            time.sleep(0.5)

        except:
            continue

    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pager_div = soup.find('div', {'class': 'rgWrap rgNumPart'})
        current_page_tag = pager_div.find('a', class_='rgCurrentPage')
        all_page_tags = pager_div.find_all('a')

        if not current_page_tag:
            break

        current_page = int(current_page_tag.text.strip())
        all_pages = [int(a.text.strip()) for a in all_page_tags if a.text.strip().isdigit()]

        next_button = driver.find_element(By.XPATH,
            '/html/body/form/div[4]/div[8]/div/div/div[2]/div/div/div[3]/div/div/table/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]'
        )

        if not next_button.is_enabled():
            break

        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)

        old_page = current_page
        next_button.click()

        driver.execute_script("window.scrollTo(0, 0);")
        WebDriverWait(driver, 10).until(
            lambda d: int(BeautifulSoup(d.page_source, 'html.parser')
                          .find('a', class_='rgCurrentPage').text.strip()) != old_page
        )
        time.sleep(0.5)

    except:
        break



# Final code for each pages and tables (InterNet lost)

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

# Headless Chrome settings
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)
driver.get("https://seei.ir/default.aspx?tabid=113")
wait = WebDriverWait(driver, 15)

# Feature lists
all_codes = []
names = []
ostan = []
shahrestan = []
shahr = []
roosta = []
address = []
code_tel = []
reg_num = []
nationalcode = []

def get_text_or_null(tag):
    return tag.text.strip() if tag and tag.text.strip() else "NULL"

while True:
    # Retry loading table if needed
    for attempt in range(2):
        try:
            wait.until(EC.presence_of_element_located((By.ID, 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00')))
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            table = soup.find('table', {'id': 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00'})
            rows = table.find_all('tr', class_='FundStatusLicensed')
            if rows:
                break
        except:
            if attempt == 1:
                driver.quit()
                raise Exception("Table not loaded after retries.")
            driver.refresh()
            time.sleep(2)

    for i, row in enumerate(rows):
        try:
            cols = row.find_all('td')
            if cols:
                text = cols[0].text.strip()
                if text.isdigit():
                    all_codes.append(text)

            btn_id = f'ctl13_ctl03_ctl00_Search__rgFunds_ctl00_ctl{str(2*i+4).zfill(2)}__imgbDetail'
            detail_button = driver.find_element(By.ID, btn_id)
            driver.execute_script("arguments[0].scrollIntoView(true);", detail_button)
            time.sleep(0.5)
            detail_button.click()

            time.sleep(1.5)
            modal_soup = BeautifulSoup(driver.page_source, 'html.parser')
            detail_items = modal_soup.find_all("div", class_=["col-6 col-md-6 detailItem", "col-12 col-md-6 detailItem"])
            row_data = []

            for div in detail_items:
                spans = div.find_all("span")
                row_data.append(get_text_or_null(spans[1]) if len(spans) >= 2 else "NULL")

            while len(row_data) < 9:
                row_data.append("NULL")

            names.append(row_data[0])
            ostan.append(row_data[1])
            shahrestan.append(row_data[2])
            shahr.append(row_data[3])
            roosta.append(row_data[4])
            address.append(row_data[5])
            code_tel.append(row_data[6])
            reg_num.append(row_data[7])
            nationalcode.append(row_data[8])

            driver.execute_script("$('.modal.fade.show').modal('hide');")
            time.sleep(0.5)

        except:
            continue

    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pager_div = soup.find('div', {'class': 'rgWrap rgNumPart'})
        current_page_tag = pager_div.find('a', class_='rgCurrentPage')
        all_page_tags = pager_div.find_all('a')

        if not current_page_tag:
            break

        current_page = int(current_page_tag.text.strip())
        all_pages = [int(a.text.strip()) for a in all_page_tags if a.text.strip().isdigit()]

        next_button = driver.find_element(By.XPATH,
            '/html/body/form/div[4]/div[8]/div/div/div[2]/div/div/div[3]/div/div/table/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]'
        )

        if not next_button.is_enabled():
            break

        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)

        old_page = current_page
        next_button.click()

        driver.execute_script("window.scrollTo(0, 0);")
        WebDriverWait(driver, 15).until(
            lambda d: int(BeautifulSoup(d.page_source, 'html.parser')
                          .find('a', class_='rgCurrentPage').text.strip()) != old_page
        )
        time.sleep(1)

    except:
        break


In [None]:
# Create DataFrame
df = pd.DataFrame({
    #"Fund Code": all_codes,
    "Name": names,
    "Ostan": ostan,
    "Shahrestan": shahrestan,
    "Shahr": shahr,
    "Roosta": roosta,
    "Address": address,
    "Phone Code": code_tel,
    "Reg Number": reg_num,
    "National Code": nationalcode
})


print(df.head())

In [None]:
df.describe().T

In [None]:
df.size

In [None]:
df.shape

In [None]:
df.shape[0]

In [None]:
len(all_codes)

In [None]:
df.to_csv("funds_data.csv", index=False, encoding='utf-8-sig')


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

# Set Chrome options
options = Options()
options.add_argument("--start-maximized")

# Start Chrome browser
driver = webdriver.Chrome(options=options)
driver.get("https://seei.ir/default.aspx?tabid=113")
wait = WebDriverWait(driver, 15)

# List to save fund codes
all_codes = []

while True:
    # Wait for the main table to load
    wait.until(EC.presence_of_element_located((By.ID, 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00')))

    # Parse page with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find('table', {'id': 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00'})
    rows = table.find_all('tr', class_='FundStatusLicensed')

    # Loop over each row and get fund code
    for row in rows:
        cols = row.find_all('td')
        if cols:
            text = cols[0].text.strip()
            if text.isdigit():
                all_codes.append(text)

    # After collecting codes, try to go to the next page
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pager_div = soup.find('div', {'class': 'rgWrap rgNumPart'})
        current_page_tag = pager_div.find('a', class_='rgCurrentPage')
        all_page_tags = pager_div.find_all('a')

        if not current_page_tag:
            break

        current_page = int(current_page_tag.text.strip())

        # Try to find Next button
        next_button = driver.find_element(By.XPATH,
            '/html/body/form/div[4]/div[8]/div/div/div[2]/div/div/div[3]/div/div/table/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]'
        )

        # Scroll and click next
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)
        old_page = current_page
        next_button.click()

        # Wait for page number to update (otherwise break)
        try:
            WebDriverWait(driver, 15).until(
                lambda d: int(BeautifulSoup(d.page_source, 'html.parser')
                              .find('a', class_='rgCurrentPage').text.strip()) != old_page
            )
        except:
            break

        time.sleep(1)

    except:
        break

# Print result
print("✅ Total fund codes collected:", len(all_codes))
print(all_codes)


✅ Total fund codes collected: 941
['1137002', '1137003', '1137005', '1137006', '1137007', '1137008', '1137009', '1137010', '1137013', '1137014', '1137015', '1137017', '1137018', '1137019', '1137020', '1137021', '1137023', '1137024', '1137025', '1137026', '1137028', '1137030', '1137031', '1137033', '1137034', '1137035', '1137036', '1137037', '1137038', '1137039', '1137040', '1137041', '1137042', '1137043', '1137044', '1137045', '1137046', '1137047', '1137048', '1137050', '1137051', '1137052', '1137055', '1137056', '1137057', '1137058', '1137059', '1137060', '1137061', '1137062', '1137063', '1137064', '1137065', '1137066', '1137067', '1137068', '1137069', '1137070', '1137071', '1137073', '1137074', '1137075', '1137076', '1137077', '1137078', '1137081', '1137082', '1137084', '1137085', '1137086', '1137087', '1137088', '1137089', '1137090', '1137091', '1335001', '1335002', '1335003', '1335004', '1335005', '1335006', '1335007', '1335008', '1335009', '1335010', '1335011', '1335012', '1335013

In [2]:
len(all_codes)

941

In [4]:
len(set(all_codes))

941

In [5]:
import pandas as pd
df_1 = pd.DataFrame({'کدهای صندوق':all_codes})

df_1.head()

Unnamed: 0,کدهای صندوق
0,1137002
1,1137003
2,1137005
3,1137006
4,1137007


In [7]:
df_1.to_csv('کدهای صندوق.csv', index=False, encoding='utf-8-sig')

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

# Headless Chrome settings
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)
driver.get("https://seei.ir/default.aspx?tabid=113")
wait = WebDriverWait(driver, 15)

# Feature lists
names = []
province = []
country = []
city = []
Village = []
address = []
code_tel = []
reg_num = []
national_code = []

def get_text_or_null(tag):
    return tag.text.strip() if tag and tag.text.strip() else "NULL"

while True:
    # Retry loading table if needed
    for attempt in range(2):
        try:
            wait.until(EC.presence_of_element_located((By.ID, 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00')))
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            table = soup.find('table', {'id': 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00'})
            rows = table.find_all('tr', class_='FundStatusLicensed')
            if rows:
                break
        except:
            if attempt == 1:
                driver.quit()
                raise Exception("Table not loaded after retries.")
            driver.refresh()
            time.sleep(2)

    for i, row in enumerate(rows):
        try:
            btn_id = f'ctl13_ctl03_ctl00_Search__rgFunds_ctl00_ctl{str(2*i+4).zfill(2)}__imgbDetail'
            detail_button = driver.find_element(By.ID, btn_id)
            driver.execute_script("arguments[0].scrollIntoView(true);", detail_button)
            time.sleep(0.5)
            detail_button.click()

            time.sleep(1)
            modal_soup = BeautifulSoup(driver.page_source, 'html.parser')
            detail_items = modal_soup.find_all("div", class_=["col-6 col-md-6 detailItem", "col-12 col-md-6 detailItem"])
            row_data = []

            for div in detail_items:
                spans = div.find_all("span")
                row_data.append(get_text_or_null(spans[1]) if len(spans) >= 2 else "NULL")

            while len(row_data) < 9:
                row_data.append("NULL")

            names.append(row_data[0])
            province.append(row_data[1])
            country.append(row_data[2])
            city.append(row_data[3])
            Village.append(row_data[4])
            address.append(row_data[5])
            code_tel.append(str(row_data[6]))
            reg_num.append(row_data[7])
            national_code.append(row_data[8])

            driver.execute_script("$('.modal.fade.show').modal('hide');")
            time.sleep(0.5)

        except:
            continue

    
# try to go to the next page
    # After collecting codes, try to go to the next page
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pager_div = soup.find('div', {'class': 'rgWrap rgNumPart'})
        current_page_tag = pager_div.find('a', class_='rgCurrentPage')
        all_page_tags = pager_div.find_all('a')

        if not current_page_tag:
            break

        current_page = int(current_page_tag.text.strip())

        # Try to find Next button
        next_button = driver.find_element(By.XPATH,
            '/html/body/form/div[4]/div[8]/div/div/div[2]/div/div/div[3]/div/div/table/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]'
        )

        # Scroll and click next
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)
        old_page = current_page
        next_button.click()

        # Wait for page number to update (otherwise break)
        try:
            WebDriverWait(driver, 15).until(
                lambda d: int(BeautifulSoup(d.page_source, 'html.parser')
                              .find('a', class_='rgCurrentPage').text.strip()) != old_page
            )
        except:
            break

        time.sleep(1)

    except:
        break


In [13]:
names

['ولی عصر(عج)',
 'ولی عصر(عج)',
 'ثامن الائمه(ع)',
 'حضرت ابوالفضل(ع)',
 'امام رضا(ع)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ثامن الائمه(ع)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'امام رضا(ع)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'موسی بن جعفر(ع)',
 'ولی عصر(عج)',
 'امام هادی(ع)',
 'غدیر',
 'امام علی(ع)',
 'ولی عصر(عج)',
 'امام حسین(ع)',
 'محمد رسول اله(ص)',
 'امام حسن عسگری(ع)',
 'امیرالمومنین(ع)',
 'امام حسن مجتبی(ع)',
 'شهدای قرآن',
 'شهدای درزیکلا شیخ',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'امیرالمومنین(ع)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'امام جعفرصادق(ع)',
 'ولی عصر(عج)',
 'امام جعفرصادق(ع)',
 'امام سجاد(ع)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ایثار امام زمان(عج)',
 'امام جعفرصادق(ع)',
 'ولی عصر(عج)',
 'امیر المومنین(ع)',
 'المهدی(عج)',
 'ولی عصر(عج)',
 'محمد رسول اله(ص)',
 'قدس',
 'الفتح',
 'ولی عصر(عج)',
 'امام حسین(ع)',
 'نصر',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 'فاطمه زهرا(س)',
 'ولی عصر(عج)',
 'ولی عصر(عج)',
 '

In [14]:
len(names)

203

In [3]:
import pandas as pd
df = pd.DataFrame({
    #"Fund Code": all_codes,
    "Name": names,
    "Ostan": ostan,
    "Shahrestan": shahrestan,
    "Shahr": shahr,
    "Roosta": roosta,
    "Address": address,
    "Phone Code": code_tel,
    "Reg Number": reg_num,
    "National Code": nationalcode
})
print(df.head())

NameError: name 'names' is not defined

In [None]:
print('5 top Records:\n ',df.head())

print('shape of my DataFrame:\n ',df.shape)
print(df.shape[0])

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

# Headless Chrome settings
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)
driver.get("https://seei.ir/default.aspx?tabid=113")
wait = WebDriverWait(driver, 15)

# Feature lists
names = []
province = []
country = []
city = []
Village = []
address = []
code_tel = []
tel_alt = []  # ستون جدید برای بخش دوم شماره تلفن
reg_num = []
national_code = []

def get_text_or_null(tag):
    return tag.text.strip() if tag and tag.text.strip() else "NULL"

while True:
    # Retry loading table if needed
    for attempt in range(2):
        try:
            wait.until(EC.presence_of_element_located((By.ID, 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00')))
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            table = soup.find('table', {'id': 'ctl13_ctl03_ctl00_SearchrgFunds_ctl00'})
            rows = table.find_all('tr', class_='FundStatusLicensed')
            if rows:
                break
        except:
            if attempt == 1:
                driver.quit()
                raise Exception("Table not loaded after retries.")
            driver.refresh()
            time.sleep(2)

    for i, row in enumerate(rows):
        try:
            btn_id = f'ctl13_ctl03_ctl00_Search__rgFunds_ctl00_ctl{str(2*i+4).zfill(2)}__imgbDetail'
            detail_button = driver.find_element(By.ID, btn_id)
            driver.execute_script("arguments[0].scrollIntoView(true);", detail_button)
            time.sleep(0.5)
            detail_button.click()

            time.sleep(1)
            modal_soup = BeautifulSoup(driver.page_source, 'html.parser')
            detail_items = modal_soup.find_all("div", class_=["col-6 col-md-6 detailItem", "col-12 col-md-6 detailItem"])
            row_data = []

            for div in detail_items:
                spans = div.find_all("span")
                row_data.append(get_text_or_null(spans[1]) if len(spans) >= 2 else "NULL")

            while len(row_data) < 9:
                row_data.append("NULL")

            names.append(row_data[0])
            province.append(row_data[1])
            country.append(row_data[2])
            city.append(row_data[3])
            Village.append(row_data[4])
            address.append(row_data[5])

            # شماره تلفن با بررسی "-"
            tel_parts = str(row_data[6]).split('-')
            if len(tel_parts) == 2:
                code_tel.append(tel_parts[0].strip())
                tel_alt.append(tel_parts[1].strip())
            else:
                code_tel.append(str(row_data[6]).strip())
                tel_alt.append("NULL")

            reg_num.append(row_data[7])
            national_code.append(row_data[8])

            driver.execute_script("$('.modal.fade.show').modal('hide');")
            time.sleep(0.5)

        except:
            continue

    # try to go to the next page
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pager_div = soup.find('div', {'class': 'rgWrap rgNumPart'})
        current_page_tag = pager_div.find('a', class_='rgCurrentPage')
        all_page_tags = pager_div.find_all('a')

        if not current_page_tag:
            break

        current_page = int(current_page_tag.text.strip())

        # Try to find Next button
        next_button = driver.find_element(By.XPATH,
            '/html/body/form/div[4]/div[8]/div/div/div[2]/div/div/div[3]/div/div/table/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]'
        )

        # Scroll and click next
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)
        old_page = current_page
        next_button.click()

        # Wait for page number to update (otherwise break)
        try:
            WebDriverWait(driver, 15).until(
                lambda d: int(BeautifulSoup(d.page_source, 'html.parser')
                              .find('a', class_='rgCurrentPage').text.strip()) != old_page
            )
        except:
            break

        time.sleep(1)

    except:
        break


df = pd.DataFrame({
    "Name": names,
    "Province": province,
    "Country": country,
    "City": city,
    "Village": Village,
    "Address": address,
    "Phone Code": code_tel,
    "Phone Code 2": tel_alt,      
    "Reg Number": reg_num,
    "National Code": national_code
})

print(df.head())

df.to_csv("funds_data.csv", index=False, encoding='utf-8-sig')


               Name  Province Country  City            Village  \
0       ولی عصر(عج)  مازندران    بابل  NULL  امین آبادو کروکلا   
1       ولی عصر(عج)  مازندران    بابل  NULL   سیاهکلامحله شرقی   
2    ثامن الائمه(ع)  مازندران    بابل  NULL           علی آباد   
3  حضرت ابوالفضل(ع)  مازندران    بابل  NULL             شوبکلا   
4       امام رضا(ع)  مازندران    بابل  NULL    درزیکلا نصیرائی   

                                             Address   Phone Code  \
0  10 کیلومتری بابل،جاده جدید امل، روستای امین اب...  01132114542   
1            بابل،جاده گنج افروز روستای سیاهکلا محله  01132230088   
2                   جاده آمل به بابل-16 کیلومتری آمل  01132642023   
3                        جاده قدیم آمل-روستای شوبکلا  01132422227   
4                           جاده شهید صالحی پلاک 394  01132143106   

  Phone Code 2 Reg Number National Code  
0         NULL         28   10760007282  
1  01132234003         16   10862138889  
2         NULL         25   10760006623  
3         NULL      