In [28]:
import pandas as pd

# 创建一个简单的DataFrame
df = pd.DataFrame({
    "A": [1, 2, 3],
    "B": [4, 5, 6],
}, index=['one', 'two', 'three'])

print(df)
# 使用.loc选择特定行
selected_rows = df.loc[['one', 'two']]
print(type(selected_rows), selected_rows)

selected_rows = df.loc['one']
print(type(selected_rows))
print( selected_rows)
# 使用.loc选择特定列
selected_columns = df.loc[:, ['A', 'B']]
print(type(selected_columns), selected_columns)

# 使用.loc选择特定行列
selected_rows_and_columns = df.loc[['one', 'two'], 'A']


# 使用.iloc选择特定行
print('------iloc[[0,2]]----')
selected_rows = df.iloc[[0, 2]]  # 选择索引为0和2的行
print(type(selected_rows))
print( selected_rows)

# 使用.iloc选择特定列
print('------df.iloc[:, [1, 1]]----')
selected_columns = df.iloc[:, [0, 1, 1]]  # 选择第0列和第2列
print(type(selected_columns))
print( selected_columns)
# 使用.iloc选择特定列
print('------df.iloc[:, 0]----')
selected_columns = df.iloc[:, 1]  # 选择第0列和第2列
print(type(selected_columns))
print( selected_columns)

# 注意：不能使用.iloc选择非整数索引的列
# selected_columns = df.iloc[:, 'A']  # 这是错误的用法

       A  B
one    1  4
two    2  5
three  3  6
<class 'pandas.core.frame.DataFrame'>      A  B
one  1  4
two  2  5
<class 'pandas.core.series.Series'>
A    1
B    4
Name: one, dtype: int64
<class 'pandas.core.frame.DataFrame'>        A  B
one    1  4
two    2  5
three  3  6
------iloc[[0,2]]----
<class 'pandas.core.frame.DataFrame'>
       A  B
one    1  4
three  3  6
------df.iloc[:, [1, 1]]----
<class 'pandas.core.frame.DataFrame'>
       A  B  B
one    1  4  4
two    2  5  5
three  3  6  6
------df.iloc[:, 0]----
<class 'pandas.core.series.Series'>
one      4
two      5
three    6
Name: B, dtype: int64


In [30]:
import pandas as pd
import numpy as np

# 设置随机种子以获得可重复的结果
np.random.seed(42)

# 生成日期索引
date_rng = pd.date_range('2024-01-01', periods=100)

# 生成仿真的OHLC数据
# 假设开盘价是基础价格，收盘价和最高价略高于开盘价，最低价略低于开盘价
open_prices = np.random.randint(100, 200, size=100)
high_prices = open_prices * (1 + np.random.uniform(0.01, 0.05))  # 比开盘价高1%-5%
low_prices = open_prices * (1 - np.random.uniform(0.01, 0.05))  # 比开盘价低1%-5%
close_prices = high_prices * (1 - np.random.uniform(0.005, 0.02))  # 收盘价在最高价之下

# 创建DataFrame
ohlc_df = pd.DataFrame({
    'Open': open_prices,
    'High': high_prices,
    'Low': low_prices,
    'Close': close_prices
}, index=date_rng)

# 显示生成的DataFrame的前几行
print(ohlc_df)

            Open        High         Low       Close
2024-01-01   151  154.664790  147.793156  152.632427
2024-01-02   192  196.659866  187.922423  194.075668
2024-01-03   114  116.766795  111.578939  115.232428
2024-01-04   171  175.150193  167.368408  172.848642
2024-01-05   160  163.883221  156.602019  161.729723
...          ...         ...         ...         ...
2024-04-05   184  188.465704  180.092322  185.989182
2024-04-06   179  183.344354  175.198509  180.935128
2024-04-07   181  185.392894  177.156034  182.956750
2024-04-08   152  155.689060  148.771918  153.643237
2024-04-09   123  125.985226  120.387802  124.329725

[100 rows x 4 columns]


In [36]:
filtered_df = ohlc_df.loc[(ohlc_df['Close'] > ohlc_df['Open'])]
filtered_df

Unnamed: 0,Open,High,Low,Close
2024-01-01,151,154.664790,147.793156,152.632427
2024-01-02,192,196.659866,187.922423,194.075668
2024-01-03,114,116.766795,111.578939,115.232428
2024-01-04,171,175.150193,167.368408,172.848642
2024-01-05,160,163.883221,156.602019,161.729723
...,...,...,...,...
2024-04-05,184,188.465704,180.092322,185.989182
2024-04-06,179,183.344354,175.198509,180.935128
2024-04-07,181,185.392894,177.156034,182.956750
2024-04-08,152,155.689060,148.771918,153.643237


In [35]:
# Pandas的query()方法允许你使用字符串表达式来筛选数据，这可以使代码更加简洁。
filtered_df = ohlc_df.query('not (Close > Open and Open >150)')
filtered_df

Unnamed: 0,Open,High,Low,Close
2024-01-03,114,116.766795,111.578939,115.232428
2024-01-06,120,122.912416,117.451514,121.297293
2024-01-13,123,125.985226,120.387802,124.329725
2024-01-14,102,104.475554,99.833787,103.102699
2024-01-15,121,123.936686,118.430277,122.308103
2024-01-17,101,103.451283,98.855025,102.091888
2024-01-19,129,132.130847,126.260378,130.39459
2024-01-20,137,140.325008,134.090479,138.481076
2024-01-21,101,103.451283,98.855025,102.091888
2024-01-24,120,122.912416,117.451514,121.297293


In [31]:
filtered_df = ohlc_df.loc[(ohlc_df['Close'] > ohlc_df['Open'])]
filtered_df

Unnamed: 0,Open,High,Low,Close
2024-01-01,151,154.664790,147.793156,152.632427
2024-01-02,192,196.659866,187.922423,194.075668
2024-01-03,114,116.766795,111.578939,115.232428
2024-01-04,171,175.150193,167.368408,172.848642
2024-01-05,160,163.883221,156.602019,161.729723
...,...,...,...,...
2024-04-05,184,188.465704,180.092322,185.989182
2024-04-06,179,183.344354,175.198509,180.935128
2024-04-07,181,185.392894,177.156034,182.956750
2024-04-08,152,155.689060,148.771918,153.643237


In [1]:
import pywencai
import pandas as pd
import numpy as np
import akshare as ak
from datetime import datetime, timedelta

df = ak.stock_zh_a_spot_em()
# 两市剔除ST股、剔除科创板股、剔除北交所股
df = df[~df['代码'].astype(str).str.startswith('4')]
df = df[~df['代码'].astype(str).str.startswith('8')]
df = df[~df['代码'].astype(str).str.startswith('68')]
df = df[~df['名称'].astype(str).str.startswith('N')]
df = df[~df['名称'].astype(str).str.startswith('*')]
df = df[~df['名称'].astype(str).str.startswith('ST')]
# 按成交额降序排序，选出开盘金额最大前N只股
df = df.sort_values(by='成交额', ascending=False)
df = df.head(300)
df['代码'] = df['代码'].astype(str).str[:6]