Skip to content

Commit

Permalink
target-i386: do not memcpy in and out of xmm_regs
Browse files Browse the repository at this point in the history
After the next patch, we will move the high parts of AVX and AVX512 registers
in the same array as the SSE registers.  This will make it impossible to
memcpy an array of 128-bit values in and out of xmm_regs in one swoop.
Use a for loop instead.

Similarly, always use XMM_Q in translate.c.  This avoids introducing bugs
such as the one fixed in the previous patch.

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
bonzini committed Jan 14, 2015
1 parent 0795808 commit bee8188
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
30 changes: 24 additions & 6 deletions target-i386/kvm.c
Expand Up @@ -1019,7 +1019,10 @@ static int kvm_put_fpu(X86CPU *cpu)
fpu.ftwx |= (!env->fptags[i]) << i;
}
memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
for (i = 0; i < CPU_NB_REGS; i++) {
stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0));
stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1));
}
fpu.mxcsr = env->mxcsr;

return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
Expand All @@ -1045,6 +1048,7 @@ static int kvm_put_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
uint16_t cwd, swd, twd;
uint8_t *xmm;
int i, r;

if (!kvm_has_xsave()) {
Expand All @@ -1065,8 +1069,6 @@ static int kvm_put_xsave(X86CPU *cpu)
memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
sizeof env->fpregs);
memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
sizeof env->xmm_regs);
xsave->region[XSAVE_MXCSR] = env->mxcsr;
*(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
Expand All @@ -1079,6 +1081,13 @@ static int kvm_put_xsave(X86CPU *cpu)
sizeof env->opmask_regs);
memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
sizeof env->zmmh_regs);

xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
stq_p(xmm, env->xmm_regs[i].XMM_Q(0));
stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1));
}

#ifdef TARGET_X86_64
memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
sizeof env->hi16_zmm_regs);
Expand Down Expand Up @@ -1384,7 +1393,10 @@ static int kvm_get_fpu(X86CPU *cpu)
env->fptags[i] = !((fpu.ftwx >> i) & 1);
}
memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
for (i = 0; i < CPU_NB_REGS; i++) {
env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
}
env->mxcsr = fpu.mxcsr;

return 0;
Expand All @@ -1395,6 +1407,7 @@ static int kvm_get_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
int ret, i;
const uint8_t *xmm;
uint16_t cwd, swd, twd;

if (!kvm_has_xsave()) {
Expand All @@ -1421,8 +1434,6 @@ static int kvm_get_xsave(X86CPU *cpu)
env->mxcsr = xsave->region[XSAVE_MXCSR];
memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
sizeof env->fpregs);
memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
sizeof env->xmm_regs);
env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
sizeof env->ymmh_regs);
Expand All @@ -1434,6 +1445,13 @@ static int kvm_get_xsave(X86CPU *cpu)
sizeof env->opmask_regs);
memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
sizeof env->zmmh_regs);

xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
}

#ifdef TARGET_X86_64
memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
sizeof env->hi16_zmm_regs);
Expand Down
8 changes: 4 additions & 4 deletions target-i386/translate.c
Expand Up @@ -2621,10 +2621,10 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)

static inline void gen_op_movo(int d_offset, int s_offset)
{
tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0)));
tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0)));
tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1)));
tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1)));
}

static inline void gen_op_movq(int d_offset, int s_offset)
Expand Down

0 comments on commit bee8188

Please sign in to comment.